# HG changeset patch # User cpt # Date 1656999779 0 # Node ID b8b8b52904a5d7b9d9cd50972ddee6c605d75a1c Uploaded diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/cpt_dnaplotter.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/cpt_dnaplotter.pl Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,465 @@ +#!/usr/bin/perl +# +# Code written by Eric Rasche +# mailto:rasche.eric@yandex.ru +# tel: 404.692.2048 +# http://eric.rasche.co.uk +# for +# Center for Phage Technology +# + +use strict; +use warnings; + +use CPT::GalaxyGetOpt; +use Data::Dumper; +use CPT::Bio; +use CPT::Circos::Conf; +my $bio = CPT::Bio->new(); + +my @argv_copy; +foreach(@ARGV){push(@argv_copy, "$_");} + +my $ggo = CPT::GalaxyGetOpt->new(); +my $options = $ggo->getOptions( + 'options' => [ + [ 'file|f', 'Input file', { validate => 'File/Input', + file_format => ['genbank', 'embl', 'txt'], + } ], + [], + ['Track Configuration'], + ['track_key', 'Key to select from genbank data', { validate => 'Genomic/Tag', required => 1, multiple => 1 } ], + ['track_feature_filter_invert', 'Should the qualifier search be inverted?', { validate => 'Option', options => { 'yes', 'Yes', 'no', 'No' } , multiple => 1 } ], + ['track_feature_filter_hastag', 'Select a tag which should be present in that qualifier (e.g., signal/tmhelix/pseudo)', { validate => 'String' , multiple => 1 } ], + ['track_feature_filter_textquery', 'Specify text which MUST be in that tag', { validate => 'String' , multiple => 1 } ], + ['track_feature_filter_strand', 'Which strand should the feature appear on?', { validate => 'Option', options => { 'f', 'Forward', 'r', 'Reverse', 'a', 'Any' } , multiple => 1 } ], + [], + ['enable_gc_skew_plot', 'Enable/Disable calculation of GC Skew Plot', { validate => 'Flag' } ], + ['gc_skew_plot_window_size', 'Window size for calculation of GC Skew', { validate => 'Int', min => 1000, default => 10000} ], + ['gc_skew_plot_step_size', 'Step size for calculation of GC Skew', { validate => 'Int', min => 200, default => 200 } ], + ], + 'outputs' => [ + [ + 'output_circos_confs', + 'Circos Configuration Files', + { + validate => 'File/Output', + required => 1, + default => 'out', + data_format => 'archive', + default_format => 'zip', + } + ], + ], + 'defaults' => [ + 'appid' => 'CircularDNAPlotter', + 'appname' => 'Circos based DNAPlotter', + 'appdesc' => 'plots genomes similar to Artemis\'s DNAPlotter', + 'appvers' => '1.94.1', + ], +); + +#perl cpt_dnaplotter.pl \ + #-f ../t/test-files/moon.gbk \ + #--track_key CDS --track_feature_filter_invert yes --track_feature_filter_hastag pseudo --track_feature_filter_strand f \ + #--track_key CDS --track_feature_filter_invert yes --track_feature_filter_hastag pseudo --track_feature_filter_strand r \ + #--track_key CDS --track_feature_filter_hastag pseudo --track_feature_filter_strand a \ + #--track_key tRNA --track_feature_filter_strand a \ + #--track_key CDS --track_feature_filter_hastag signal --track_feature_filter_strand a \ + #--track_key CDS --track_feature_filter_hastag tmhelix --track_feature_filter_strand a + + +my @reorg_args = (); + +my $cum_gc_ske_mean = 0; + + + +my %latest = (); +for(my $i = 0; $i < scalar(@argv_copy); $i++){ + my $c = $argv_copy[$i]; + # We have entered a new one block + if($c eq '--track_key'){ + # If we have loaded data + if(scalar(keys(%latest)) > 0){ + my %copy; + foreach(keys(%latest)){ + $copy{$_} = "" . $latest{$_}; + } + push(@reorg_args, \%copy); + } + + # Clean out latest to prep for new data + foreach(keys(%latest)){ + delete $latest{$_}; + } + } + + if($c =~ /^--track_(.*)/){ + $latest{$1} = $argv_copy[$i+1]; + # Artificially bump so we don't bother looking at the answer to + # this question. We can "get away" with this because none of + # the options are flags. However, I've disabled it in the event + # that flags ARE introduced + #$i++; + } +} +push(@reorg_args, \%latest); +#$VAR1 = [ + #{ + #'feature_filter_invert' => 'yes', + #'feature_plot_color' => '005500', + #'feature_filter_strand' => 'f', + #'feature_filter_hastag' => 'pseudo', + #'key' => 'CDS' + #}, + #{ + #'feature_filter_strand' => 'a', + #'key' => 'RBS' + #} + #]; +my @files = (); + +my $number_of_tracks = 0; +sub register_track { + #my $r0 = ( 90 - (10 * $number_of_tracks - 1)/2) / 100; + #my $r1 = ( 90 - (10 * $number_of_tracks - 9)/2) / 100; + my $r0 = ( 90 - (10 * $number_of_tracks - 1)/1) / 100; + my $r1 = ( 90 - (10 * $number_of_tracks - 9)/1) / 100; + $number_of_tracks++; + return ($r0, $r1); +} + +sub circosconf { + my $cc = CPT::Circos::Conf->new(); + $cc->include('etc/colors_fonts_patterns.conf'); + # Features to plot along the genome + $cc->include('ideogram.conf'); + # markings indicating position along genome + $cc->include('ticks.conf'); + # Genome data + $cc->set('karyotype', 'karyotype.conf'); + # Default image params are fine + $cc->start_block('image'); + $cc->include('etc/image.conf'); + $cc->end_block(); + # ??? + $cc->set('chromosome_units', '1000'); + $cc->set('chromosome_display_default', 'yes'); + #$cc->include('highlights.conf'); + $cc->include('plots.conf'); + + $cc->include('etc/housekeeping.conf'); + my $result = $cc->finalize(); + $cc = CPT::Circos::Conf->new(); + return $result; +} +sub ideogramconf{ + my $cc = CPT::Circos::Conf->new(); + $cc->start_block('ideogram'); + $cc->start_block('spacing'); + $cc->set('default','0u'); + $cc->set('break','0u'); + $cc->end_block(); + + $cc->set('thickness', '20p'); + $cc->set('stroke_thickness', '2'); + $cc->set('stroke_color', 'black'); + $cc->set('fill','no'); + $cc->set('fill_color','black'); + $cc->set('radius','0.85r'); + $cc->set('show_label','yes'); + $cc->set('label_font','default'); + $cc->set('label_radius','dims(ideogram,radius) + 0.05'); + $cc->set('label_size','36'); + $cc->set('label_parallel','yes'); + $cc->set('label_case','upper'); + + $cc->set('band_stroke_thickness','2'); + $cc->set('show_bands','yes'); + $cc->set('fill_bands','yes'); + $cc->end_block(); + + return $cc->finalize(); +} +sub generate_feature_table { + my ($filename, %filter) = @_; + print "Filtering on features\n"; + print Dumper \%filter; + my $seqio_object = Bio::SeqIO->new(-file => $options->{file}, -format=>'genbank'); + # Only handing first sequence. + my $seq_object = $seqio_object->next_seq; + my $parent = $seq_object->display_id(); + # Feature data + my @features; + foreach my $feat($seq_object->get_SeqFeatures()){ + if($feat->primary_tag() eq $filter{key}){ + # If they've said "hastag" AND we do indeed have that tag AND we haven't inverted this filter. + if( + ($filter{feature_filter_hastag} && $feat->has_tag($filter{feature_filter_hastag}) && !$filter{feature_filter_invert}) + || + ($filter{feature_filter_hastag} && $filter{feature_filter_invert} && !$feat->has_tag($filter{feature_filter_hastag})) + || + (! $filter{feature_filter_hastag}) + ){ + if( + !$filter{feature_filter_strand} + || + ($feat->strand() == 1 && ( $filter{feature_filter_strand} eq 'f' || $filter{feature_filter_strand} eq 'a' )) + || + ($feat->strand() == -1 && ( $filter{feature_filter_strand} eq 'r' || $filter{feature_filter_strand} eq 'a' )) + || + ($feat->strand() == 0 && ( $filter{feature_filter_strand} eq 'a' )) + ){ + push(@features, join(' ', $parent, $feat->start, $feat->end)); + } + } + } + } + print "Found " . scalar @features . " features \n"; + push(@files, [ 'data/' . $filename, join("\n", @features) ] ); +} +sub plotsconf{ + my $cc = CPT::Circos::Conf->new(); + + $cc->start_block('plots'); + + + my $idx = 0; + foreach(@reorg_args){ + my %filter = %{$_}; + #{ + #'feature_filter_invert' => 'yes', + #'feature_plot_color' => '005500', + #'feature_filter_strand' => 'f', + #'feature_filter_hastag' => 'pseudo', + #'key' => 'CDS' + #}, + $idx++; + my $filename = sprintf('org.features.%s.txt', $idx); + generate_feature_table($filename, %filter); + + my ($r0,$r1) = register_track(); + $cc->start_block('plot'); + $cc->set('type','tile'); + $cc->set('file',$filename); + $cc->set('orientation', 'center'); + $cc->set('thickness', '30'); + $cc->set('r1', $r1 . 'r');# '0.78r'); + $cc->set('r0', $r0 . 'r');# '0.72r'); + $cc->set('layers','3'); + $cc->set('layers_overflow','collapse'); + $cc->set('color','paired-6-qual-' . $idx); + $cc->end_block(); + } + + if($options->{enable_gc_skew_plot}){ + my ($r0,$r1) = register_track(); + $cc->start_block('plot'); + $cc->set('type','histogram'); + $cc->set('file','gc.txt'); + $cc->set('r1',$r1 . 'r'); + $cc->set('r0',$r0 . 'r'); + $cc->set('fill_color','purple'); + $cc->set('orientation','in'); + $cc->start_block('rules'); + $cc->start_block('rule'); + $cc->set('condition','var(value) < 0'); + $cc->set('fill_color', 'green'); + $cc->end_block(); + $cc->end_block(); + $cc->end_block(); + } + + #$cc->start_block('plot'); + #$cc->set('type','histogram'); + #$cc->set('file','gc_cumulative.txt'); + #$cc->set('r1','0.6r'); + #$cc->set('r0','0.55r'); + #$cc->set('fill_color','purple'); + #$cc->set('orientation','out'); + #$cc->start_block('rules'); + #$cc->start_block('rule'); + #$cc->set('condition','var(value) < ' . $cum_gc_ske_mean); + #$cc->set('fill_color', 'green'); + #$cc->end_block(); + #$cc->end_block(); + #$cc->end_block(); + + $cc->end_block(); + return $cc->finalize(); +} +sub ticksconf{ + my $cc = CPT::Circos::Conf->new(); + + $cc->set('show_ticks','yes'); + $cc->set('show_tick_labels','yes'); + $cc->start_block('ticks'); + $cc->set('radius','1r'); + $cc->set('color','black'); + $cc->set('thickness','2p'); + $cc->set('multiplier','1e-3'); + $cc->set('format','%d'); + + $cc->start_block('tick'); + $cc->set('spacing','1000u'); + $cc->set('size','10p'); + $cc->end_block(); + + $cc->start_block('tick'); + $cc->set('spacing','10000u'); + $cc->set('size','15p'); + $cc->set('show_label','yes'); + $cc->set('label_size','20p'); + $cc->set('label_offset','10p'); + $cc->set('format','%d'); + $cc->end_block(); + $cc->end_block(); + return $cc->finalize(); +} +sub karyotype { + my @karyotype_data = (); + my $seqio_object = Bio::SeqIO->new(-file => $options->{file}, -format=>'genbank'); + # Only handing first sequence. + my $seq_object = $seqio_object->next_seq; + # Main 'chromosome' data + push(@karyotype_data, join(' ', 'chr', '-',$seq_object->display_id(),$seq_object->display_id(),0, $seq_object->length(),'black')); + + return join("\n", @karyotype_data); +} + +sub gcgraph_cumulative { + my @gcdata = (); + my $seqio_object = Bio::SeqIO->new(-file => $options->{file}, -format=>'genbank'); + # Only handing first sequence. + my $seq_object = $seqio_object->next_seq; + + my $parent = $seq_object->display_id(); + + my $seq = $seq_object->seq(); + my $sep = int($options->{gc_skew_plot_window_size}/2); + my $stepsep = int($options->{gc_skew_plot_step_size}/2); + my $cumulative_gc_skew = 0; + my @cumgc_vals; + + my $count = 0; + foreach(my $i = $sep; $i < $seq_object->length() - $sep - $options->{gc_skew_plot_step_size}; $i += $options->{gc_skew_plot_step_size}){ + $count++; + $cumulative_gc_skew += _calculate_gc_skew_for_seq(substr($seq,$i-$sep,$options->{gc_skew_plot_window_size})), + push(@cumgc_vals, $cumulative_gc_skew); + push(@gcdata, join(" ", + $parent, + $i - $stepsep, + $i + $stepsep, + $cumulative_gc_skew + )); + } + + my $sum = 0; + foreach(@cumgc_vals){$sum += $_;} + $cum_gc_ske_mean = $sum / $count; + + return join("\n", @gcdata); + # Main 'chromosome' data +} +sub gcgraph { + my @gcdata = (); + my $seqio_object = Bio::SeqIO->new(-file => $options->{file}, -format=>'genbank'); + # Only handing first sequence. + my $seq_object = $seqio_object->next_seq; + + my $parent = $seq_object->display_id(); + + my $seq = $seq_object->seq(); + my $sep = int($options->{gc_skew_plot_window_size}/2); + my $stepsep = int($options->{gc_skew_plot_step_size}/2); + foreach(my $i = $sep; $i < $seq_object->length() - $sep - $options->{gc_skew_plot_step_size}; $i += $options->{gc_skew_plot_step_size}){ + push(@gcdata, join(" ", + $parent, + $i - $stepsep, + $i + $stepsep, + _calculate_gc_skew_for_seq(substr($seq,$i-$sep,$options->{gc_skew_plot_window_size})), + )); + } + return join("\n", @gcdata); + # Main 'chromosome' data +} +sub _calculate_gc_skew_for_seq { + my ($seq) = @_; + $seq = uc($seq); + my %counts; + foreach(split //,$seq){ + $counts{$_}++; + } + if($counts{G} + $counts{C} > 0){ + return ($counts{G} - $counts{C}) / ($counts{G} + $counts{C}); + } + return 0; +} + +push(@files, [ 'etc/karyotype.conf', karyotype() ]); +push(@files, [ 'etc/circos.conf', circosconf() ]); +push(@files, [ 'etc/ideogram.conf', ideogramconf() ]); +if($options->{enable_gc_skew_plot}){ + push(@files, [ 'data/gc.txt', gcgraph() ]); + push(@files, [ 'data/gc_cumulative.txt', gcgraph_cumulative() ]); +} + +push(@files, [ 'etc/plots.conf', plotsconf() ]); +push(@files, [ 'etc/ticks.conf', ticksconf() ]); + + +use Archive::Any::Create; +my $archive = Archive::Any::Create->new(); +$archive->container('conf'); +foreach(@files){ + my ($location, $content) = @{$_}; + $archive->add_file($location, $content); +} + +use CPT::OutputFiles; +my $crr_output = CPT::OutputFiles->new( + name => 'output_circos_confs', + GGO => $ggo, +); +$crr_output->CRR(data => $archive); + +=head1 NAME + +DNAPlotter + +=head1 DESCRIPTION + +Much like artemis's DNAPlotter, this tool plots genomes in a ciruclar fashion, and can plot gc-deviation tracks as well. The options are somewhat reduced compared to artemis, so if you need something that isn't available in this version please file a bug report. + +=head1 USE + +Each track has several parameters: + +=over 4 + +=item track_key + +This selects a set of features from a genbank file, e.g., CDSs or tRNAs + +=item track_feature_filter_invert + +This parameter will invert (negate) the results of whatever query parameters you specify after it. + +=item track_feature_filter_hastag + +Require that a feature has a specific tag.... + +=item track_feature_filter_textquery + +...with this specific text in it + +=item track_feature_filter_strand + +Which strand should the feature be on (not inverted) + +=back + +Additionally, users are able to enable/disable GC skew plots. it's suggested that these are generally left alone, as they can quickly increase runtime. + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/cpt_genbank2circosk.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/cpt_genbank2circosk.pl Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,131 @@ +#!/usr/bin/perl +# +# Code written by Eric Rasche +# mailto:rasche.eric@yandex.ru +# tel:404.692.2048 +# http://eric.rasche.co.uk +# for +# Center for Phage Technology +# + +use strict; +use warnings; + +use CPT::GalaxyGetOpt; +use Data::Dumper; + +my $ggo = CPT::GalaxyGetOpt->new(); +my %colors = map { $_ => $_ } qw(red orange yellow green blue gray black white); +my %intensity = map { $_ => $_ } qw (vvvvl vvvl vvl vl vd vvd vvvd vvvvd); + +my $options = $ggo->getOptions( + 'options' => [ + [ + 'file', + 'Input file', + { + required => 1, + validate => 'File/Input', + file_format => ['genbank', 'embl', 'txt'], + } + ], + [ + 'chromosome' => 'Name for the + chromosome inside Circos', + { + required => 1, + validate => 'String' + } + ], + [ + 'color' => 'Color to use for Circos plot', + { + required => 1, + validate => 'Option', + options => \%colors, + } + ], + [ + 'intensity' => 'Circos color intensity. ', + { + validate => 'Option', + options => \%intensity, + } + ], + ], + 'outputs' => [ + [ + 'circosk', + 'Circos Karyotype File', + { + validate => 'File/Output', + required => 1, + default => 'karyotype', + data_format => 'text/plain', + default_format => 'TXT' + } + ], + ], + 'defaults' => [ + 'appname' => 'Genbank2CircosK', + 'appid' => 'Genbank2CircosK', + 'appvers' => '1.94', + 'appdesc' => +'Convert genbank files to Circos Karyotype configuration files', + ], + 'tests' => [ + { + test_name => "Default", + params => { + 'file' => 'test-data/inputs/multi.gbk', + 'chromosome' => 'test', + 'color' => 'red', + 'intensity' => 'vvvl', + }, + outputs => { + 'circosk' => ['karyotype.txt', 'test-data/outputs/circosk.conf'], + } + }, + ], +); + +use CPT::Bio; +my $bio = CPT::Bio->new(); + +my @results; +my $c = 0; +my $seqio = $bio->requestCopyIO( file => $options->{file} ); + +while(my $seqobj = $seqio->next_seq()){ + foreach my $feat ( $seqobj->get_SeqFeatures () ) { + + #band test 12 CDS__test_1gbk 5715 6335 red] + next if ( $feat->primary_tag ne 'CDS' ); + my $id = $bio->_getIdentifier($feat); + $id =~ s/\s+/_/g; + push( + @results, + join( + ' ', + ( + 'band', + $options->{'chromosome'}, + $c++, + $id, + $feat->start, + $feat->end, + (defined $options->{'intensity'} ? $options->{'intensity'} : '') . $options->{'color'} + ) + ) + ); + } +} + +my $z = join( "\n", @results ); + +use CPT::OutputFiles; +my $output = CPT::OutputFiles->new( + name => 'circosk', + GGO => $ggo, +); +$output->CRR(data => $z); diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/cpt_psm_0_prep.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/cpt_psm_0_prep.pl Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,194 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use Storable; +use Bio::SearchIO; +use Bio::SeqIO; +use Bio::Tools::CodonTable; +use Data::Dumper; +use File::Temp qw/tempfile tempdir/; + + +use CPT::GalaxyGetOpt; +my $ggo = CPT::GalaxyGetOpt->new(); +my $options = $ggo->getOptions( + 'options' => [ + [ 'file', 'Input file', { validate => 'File/Input', multiple => 1, required => 1, + file_format => ['genbank', 'txt'], + } ], + ], + 'outputs' => [ + [ + 'cpt_psm_object', + 'Output PSM Object', + { + validate => 'File/Output', + required => 1, + default => 'cpt_psm', + data_format => 'text/plain', + default_format => 'TXT' + } + ], + ], + 'defaults' => [ + 'appid' => 'PSM.Prep', + 'appname' => 'PSM Prep', + 'appdesc' => 'prepares data for the PSM Plotter', + 'appvers' => '1.94.2', + ], + 'tests' => [], +); + + + +use CPT::Bio; +my $bio = CPT::Bio->new(); + +my @genbank_files = @{$options->{file}}; + +my %data = ( + file_list => [], +); + +my $GLOBAL_UNLINK_VAR = 1; +my $tempdir = tempdir('cpt.psm2.XXXXXXX',CLEANUP => $GLOBAL_UNLINK_VAR); +use CPT::Util::CRC64; +my $crc = CPT::Util::CRC64->new(); + +foreach my $file(@genbank_files){ + my $seqio_object = Bio::SeqIO->new(-file => $file,-format=>'genbank'); + while(my $seqobj = $seqio_object->next_seq){ + my ( $fh, $path ) = tempfile('cds_export.XXXXXXXXX', UNLINK => $GLOBAL_UNLINK_VAR, DIR => $tempdir, SUFFIX => '.fa'); + + my @gi_array; + foreach my $feat ( $seqobj->get_SeqFeatures ) { + if($feat->primary_tag eq 'CDS'){ + my $header = $bio->_getIdentifier($feat); + # This ensures proteins have a file-specific ID appeneded to them. + my $seq = $bio->translate( + $bio->intelligent_get_seq($feat)); + + # Proteins come with translated stop codon + $seq =~ s/\*//g; + $seq =~ s/\+//g; + $seq =~ s/#//g; + $header .= "_" . $crc->crc64($seq); + push @gi_array, $header; + print $fh ">$header\n$seq\n"; + } + } + $data{gbk}{$seqobj->display_id()}{'gi'} = \@gi_array; + $data{gbk}{$seqobj->display_id()}{'fasta_location'} = $path; + $data{gbk}{$seqobj->display_id()}{'gbk_location'} = $file; + push(@{$data{file_list}}, $seqobj->display_id()); + close $fh; + } +} + + + +use IPC::Run3; + +# Concatenate Fasta Files +my @fasta_files; +foreach(@{$data{file_list}}){ + push(@fasta_files, $data{gbk}{$_}{fasta_location}); +} +my @command = ('cat', @fasta_files); +my ($merged_fa_fh, $merged_fa_path) = tempfile('merged.XXXXXXXXX', UNLINK => 1, DIR => $tempdir, SUFFIX => '.fa'); +my ($in, $out, $err); +run3 \@command, \$in, \$out, \$err; +if($err){ + print STDERR $err; +} +print $merged_fa_fh $out; +close($merged_fa_fh); + + +# Create Blast Database +my ($blastdb_fh, $blastdb_path) = tempfile('blastdb.XXXXXXXXX', UNLINK => 1, DIR => $tempdir); +@command = ('makeblastdb', + '-dbtype', 'prot', + '-in', $merged_fa_path, + '-out', $blastdb_path, +); +my ($makeblast_in,$makeblast_out,$makeblast_err); +run3 \@command, \$makeblast_in, \$makeblast_out, \$makeblast_err; + +# Blast files +foreach(@{$data{file_list}}){ + #push(@fasta_files, $data{gbk}{$_}{fasta_location}); + my @blast_cmd = ('blastp', + '-query', $data{gbk}{$_}{fasta_location}, + '-out', $data{gbk}{$_}{fasta_location} . ".xml", + '-outfmt', '5', + '-db', $blastdb_path, + ); + my ($blast_in,$blast_out,$blast_err); + run3 \@blast_cmd, \$blast_in, \$blast_out, \$blast_err; +} + +my $value; +my @data_tsv; + +foreach(@{$data{file_list}}){ + #push(@fasta_files, $data{gbk}{$_}{fasta_location}); + my $file = $data{gbk}{$_}{fasta_location}; + + my $in = new Bio::SearchIO( + -format => 'blastxml', + -tempfile => 1, + -file => "$file.xml", + ); + while( my $result = $in->next_result ) { + while( my $hit = $result->next_hit ) { + while( my $hsp = $hit->next_hsp ) { + my $Identity = $hsp->percent_identity/100 * $hsp->length('query'); + my $IterationQueryLength = $result->query_length(); + my $HitLength = $hit->length(); + my $dice = (200*$Identity)/($HitLength + $IterationQueryLength); + + my $c = $result->query_description();#genome_a_header + my $d = $hit->name();#genome_b_header + # Skip self-self links + next if($c eq $d); + + push (@data_tsv, + [ + $c, + $d, + $hsp->evalue(), + $dice, + ] + ); + + }#close hsp + }#close hit + }#close result +}#close for genomes + +$data{hit_table} = \@data_tsv; + +use CPT::OutputFiles; +my $psmout = CPT::OutputFiles->new( + name => 'cpt_psm_object', + GGO => $ggo, +); +my @val = $psmout->CRR(data => "none", extension => 'psm'); +store \%data,$val[0]; + + + + + + + +=head1 NAME + +PSM Prep + +=head1 DESCRIPTION + +This tool takes in 2 or more GenBank files, blasts, and prepares data structures for use in the companion tool: PSM Plotter. Select as many (multi)-gbk files as you I want to plot. Once this tool is done, you can select any subset of those to plot then. + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/cpt_psm_1_plot.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/cpt_psm_1_plot.pl Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,440 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use Storable; +use CPT::GalaxyGetOpt; +use CPT::Bio::NW_MSA; +use Data::Dumper; +use CPT::Circos::Conf; +use POSIX; + + +my $ggo = CPT::GalaxyGetOpt->new(); +my $options = $ggo->getOptions( + 'options' => [ + [ 'file', 'PSM2 Data File', { validate => 'File/Input', required => 1 } ], + [ 'user_ordering', 'List of genome IDs used in the analysis, can be comma/space/newline separated.', { validate => 'String', required => 1 }], + [], + ['Plot Options'], + ['percent_filled' , 'Percentage of a whole block that an individual gene is' , { validate => 'Float', default=>'0.8', min => '0.1', max => '1.0' }], + ['ig_dist' , 'Maximum length of links between genome comparisons' , { validate => 'Int', default => 100 }], + ['stroke_thickness' , 'Thickness of inter-genome links' , { validate => 'Int', default => '2', min => 1, max => 10 } ], + ['every_nth' , 'Plot every Nth gene a modified version of the main color for that genome', { validate => 'Int', default => '20'}], + [], + ['Cutoffs'], + ['evalue' , 'Evalue cutoff' , { validate => 'Float' , default => 1e-4 } ] , + ['dice' , 'Dice cutoff' , { validate => 'Float' , default => 50 } ] , + [], + ['Alignment Options'], + ['mismatch' , 'Mismatch Score' , { validate => 'Float' , default => -0.1 } ] , + ['gap_penalty' , 'Gap Penalty' , { validate => 'Float' , default => '0.0' } ] , + ['match' , 'Match Score' , { validate => 'Float' , default => 5 } ] , + ], + 'outputs' => [ + [ + 'output_circos_confs', + 'Output Circos Conf Object', + { + validate => 'File/Output', + required => 1, + default => 'psm3', + data_format => 'archive', + default_format => 'tar.gz', + } + ], + ], + 'defaults' => [ + 'appid' => 'PSM.Plot', + 'appname' => 'PSM Plotter', + 'appdesc' => 'plots data from PSM Prep', + 'appvers' => '1.94.2', + ], + 'tests' => [ + ], +); + +my $percent_filled = $options->{percent_filled}; +my $width = 1000*$percent_filled; +my $spacing = 1000-$width; + + + +my $offset = ($width+$spacing)/2; +my $full_increment = $width+$spacing; +my $halfwidth = $width/2; + +#my %option_map = ( + #'offset' => ($width+$spacing)/2, + #'full_increment' => $width+$spacing, + #'halfwidth' => $width/2, + #'gap_penalty' => $options->{gap_penalty}, + #'match' => $options->{match}, + #'heatmap' => 1, + #'heatmap_low' => hex("0xCCCCCC"), + #'heatmap_high' => hex("0x333333"), + #'heatmap_bucket' => 8, + #'every_nth' => $options->{every_nth}, + #'user_ordering' => $options->{user_ordering}, + #'dice' => $options->{dice}, + #'evalue' => $options->{evalue}, +#); +#Color/name correspondance, to be used in writing the circos-0.63-pre1 files + +my @user_ordering; +push(@user_ordering, split(/[,\n\r\s]+/, $options->{user_ordering})); + +my @aligned_results; +my %fh_relationship; +my %precomputed_colour_hash; +my %protein_position_quicklookup; +my %response = (); +my %data_file = %{retrieve($options->{file})}; + +my %uo_idx; +for(my $i=0;$i "link text" +my %linkages = %{linkages()}; +# a => b => "link text" + +sub align{ + print STDERR "Aliging genomes\n"; + my $msa = CPT::Bio::NW_MSA->new( + gap_penalty => $options->{'gap_penalty'}, + match_score => $options->{'match'}, + mismatch_score => $options->{'mismatch'}, + bidi => 1, + ); + + my @hits = @{$data_file{hit_table}}; + + foreach my $hit(@hits){ + my ($from, $to, $evalue, $dice) = @{$hit}; + if($evalue < $options->{evalue} && $dice > $options->{dice}){ + if($options->{verbose}){ + print "$from $to\n"; + } + $msa->add_relationship($from, $to); + } + } + + foreach my $genome(@user_ordering){ + print STDERR "\tAligning $genome\n"; + my $gi_list_ref = $data_file{gbk}{$genome}{gi};#"GI" list + if(! defined $gi_list_ref){ + warn "Could not find $genome genome in the data file. Please be sure you have correctly specified the name of a genome from a genbank file. (See the LOCUS line for the name)."; + }else{ + $msa->align_list($gi_list_ref); + } + } + @aligned_results = $msa->merged_array(); +} +sub compmap_proteins{ + my @Narr = ();#Keep count of how many items we've had in a single + #column, so the modulus whe we're colouring them in will work properly, + #rather than being on every Nth radial colum in the plot + # + my $_max = scalar @aligned_results; + my %protein_files; + + #for(my $i = scalar @aligned_results - 1; $i >= 0; $i--){ + for(my $i = 0; $i < scalar @aligned_results; $i++){ + # Get the current row from the PSM result object + my @current_row = @{$aligned_results[$i]}; + if($options->{verbose}){ + print join("\t", @current_row) , "\n"; + } + + for(my $j = 0; $j < scalar @current_row; $j++){ + if($current_row[$j] ne "-"){ + $protein_position_quicklookup{$current_row[$j]} = [$i,$j]; + $Narr[$j]++; + my $color_str = ''; + if($Narr[$j] % $options->{every_nth} == 0){ + $color_str = 'fill_color=accent-8-qual-inv-' . ($j+1) . ','; + } + my $str = join(' ', + 'compmap ', + (($i+1) * $full_increment - $halfwidth ), + (($i+1) * $full_increment + $halfwidth ), + "${color_str}f=". $current_row[$j] + ); + $protein_files{$user_ordering[$j]} .= $str . "\n"; + } + } + } + return \%protein_files; +} +sub linkages{ + my @hits = @{$data_file{hit_table}}; + my %links; + foreach my $hit (@hits){ + my ($from, $to, $evalue, $dice) = @{$hit}; + if($evalue < $options->{evalue} && $dice > $options->{dice}){ + if(defined $protein_position_quicklookup{$from} && defined $protein_position_quicklookup{$to}){ + my ($theta0,$radius0) = @{$protein_position_quicklookup{$from}}; + my ($theta1,$radius1) = @{$protein_position_quicklookup{$to}}; + # If this is a self-self link, disable plotting because we don't care. + # If ig_dist is disabled or distance is between them is less than our minimum + if($radius0 != $radius1 + && ($options->{'ig_dist'} == "-1" || abs($theta0-$theta1) <= $options->{'ig_dist'}) + ){ + # Create the dataset + my @row_data = ('compmap', + ); + # We work under the assumption that all hits + # are bi-directional, so we swap them to be + # smallest first no matter what. + if($radius1 < $radius0){ + my $tmp = $radius1; + $radius1 = $radius0; + $radius0 = $tmp; + # We also want to add in reverse order + push(@row_data, + (($theta0+1)*$full_increment), + (($theta1+1)*$full_increment), + ); + }else{ + push(@row_data, + (($theta1+1)*$full_increment), + (($theta0+1)*$full_increment), + ); + } + + # If it's a link with the same theta + # value, then we'll go ahead and hide + # behind the track to make it a little + # prettier. + my $zstr; + if($theta0 == $theta1){ + $zstr="z=0"; + }else{ + $zstr="z=100"; + } + + # Create the additional row data + push(@row_data, + join(',', "dice=$dice", "color=" . colorstr($dice)) + ); + $links{$radius0}{$radius1} .= join(' ', @row_data) . "\n"; + } + } + } + } + return \%links; +} +sub colorstr { + my ($dice) = @_; + if($dice > 90) { + return 'black'; + }else{ + return 'greys-9-seq-' . floor($dice / 10); + } +} +sub circosconf { + my $cc = CPT::Circos::Conf->new(); + $cc->include('etc/colors_fonts_patterns.conf'); + $cc->start_block('colors'); + $cc->set('accent-8-qual-inv-1', '42, 135, 42'); + $cc->set('accent-8-qual-inv-2', '111, 83, 150'); + $cc->set('accent-8-qual-inv-3', '182, 112, 46'); + $cc->set('accent-8-qual-inv-4', '178, 178, 53'); + $cc->set('accent-8-qual-inv-5', '13, 63, 128'); + $cc->set('accent-8-qual-inv-6', '183, 0, 96'); + $cc->set('accent-8-qual-inv-7', '116, 47, 0'); + $cc->set('accent-8-qual-inv-8', '45, 45, 45'); + $cc->end_block(); + # markings indicating position along genome + $cc->include('example/etc/ideogram.conf'); + #$cc->include('rules.conf'); + # Genome data + $cc->set('karyotype', 'karyotype.conf'); + # Default image params are fine + $cc->start_block('image'); + $cc->include('etc/image.conf'); + $cc->end_block(); + #$cc->include('highlights.conf'); + $cc->include('plots.conf'); + #$cc->include('rules.conf'); + + $cc->include('etc/housekeeping.conf'); + my $result = $cc->finalize(); + $cc = CPT::Circos::Conf->new(); + return $result; +} +sub karyotype { + my @karyotype_data = ( + "chr - compmap compmap 0 ".((scalar @aligned_results)*1000+500)." white" + ); + return join("\n", @karyotype_data); +} + +my @files = (); + +my $number_of_tracks = 0; +sub register_track { + my ($r0,$r1) = calculate_individual_track($number_of_tracks); + $number_of_tracks++; + return ($r0, $r1); +} +sub calculate_individual_track { + my ($idx) = @_; + my $r0 = ( 90 - (10 * $idx - 1)/1) / 100; + my $r1 = ( 90 - (10 * $idx - 9)/1) / 100; + return ($r0, $r1); +} +sub genome_data { + my $cc = CPT::Circos::Conf->new(); + # Map string back to position in array. + #$cc->set('z',10); + # Loop across all our protein data sets + $cc->start_block('plots'); + foreach my $genome(@user_ordering){ + # Add protein file + my $filename = sprintf('org.features.%s.txt', $genome); + push(@files, [ 'data/'.$filename, $compmap_proteins{$genome}]); + # Create associated tracks + + my ($r0,$r1) = register_track(); + $cc->start_block('plot'); + $cc->set('type','highlight'); + $cc->set('file', $filename); + $cc->set('r0', $r0 .'r'); + $cc->set('r1', $r1 .'r'); + $cc->set('z', '50'); + $cc->set('fill_color','accent-8-qual-' . ($uo_idx{$genome} + 1)); + $cc->set('stroke_thickness', '1'); + $cc->set('stroke_color', 'black'); + $cc->end_block(); + } + + + foreach my $from(@user_ordering){ + foreach my $to(@user_ordering){ + next if($from eq $to || $uo_idx{$from} > $uo_idx{$to}); + + if($linkages{$uo_idx{$from}}{$uo_idx{$to}}){ + my $filename = sprintf('links.%s.%s.txt', $from, $to); + push(@files, [ 'data/'.$filename, $linkages{$uo_idx{$from}}{$uo_idx{$to}}]); + #push(@files, [ 'data/'.$filename, 'blaaaaaaah']); + + my ($r0a, $r0b) = calculate_individual_track($uo_idx{$to}); + my ($r1a, $r1b) = calculate_individual_track($uo_idx{$from}); + + # If they're in this ordering, they will be pointing at + # the "outsides" of each genome/protein track, so we + # swap with the internal ones. + if($r1b > $r0a){ + $r0a = $r1a; + $r1b = $r0b; + } + + $cc->start_block('plot'); + $cc->set('type','connector'); + $cc->set('thickness', $options->{stroke_thickness}); + $cc->set('file', $filename); + if($r1b<$r0a){ + $cc->set('r0', $r1b .'r'); + $cc->set('r1', $r0a .'r'); + }else{ + $cc->set('r0', $r0a .'r'); + $cc->set('r1', $r1b .'r'); + } + $cc->set('connector_dims', '0,0.3,0.4,0.3,0'); + $cc->set('color','black'); + $cc->end_block(); + } + } + } + + + $cc->end_block(); + return $cc->finalize(); +} +sub rulesconf { + my ($self) = @_; + my $cc = CPT::Circos::Conf->new(); + $cc->start_block('rules'); + for(my $i = 0; $i < 10; $i++){ + $cc->start_block('rule'); + $cc->set('importance', 10 - $i); + $cc->set('condition', 'var(dice) > ' . (10*$i)); + if($i == 9){ + $cc->set('color', 'black'); + }else{ + $cc->set('color', 'gray' . (10 * ($i+1))); + } + #$cc->set('z',10-$i); + $cc->end_block(); + } + $cc->end_block(); +} + +push(@files, [ 'etc/karyotype.conf', karyotype() ]); +push(@files, [ 'etc/circos.conf', circosconf() ]); +#push(@files, [ 'etc/rules.conf', rulesconf() ]); +push(@files, [ 'etc/plots.conf', genome_data() ]); + + + +use Archive::Any::Create; +my $archive = Archive::Any::Create->new(); +$archive->container('conf'); +foreach(@files){ + my ($location, $content) = @{$_}; + $archive->add_file($location, $content); +} + +use CPT::OutputFiles; +my $crr_output = CPT::OutputFiles->new( + name => 'output_circos_confs', + GGO => $ggo, +); +$crr_output->CRR(data => $archive); + + +=head1 NAME + +PSM Plotter + +=head1 DESCRIPTION + +Following the execution of the PSM Prep tool, this tool plots a subset of those genomes as ciruclar tracks with protein-protein relationships plotted as links between the boxes representing proteins. + +=head2 IMPORTANT PARAMETERS + +=over 4 + +=item C + +This parameter controls the order in which genomes are aligned and then plotted. This MUST contain (comma/space) separated values listing the order (outside to in) in which you want your genomes to appear. In this field, type the name of each genome. The name can be found on the first line of the file under "LOCUS NC_00000001", where NC_00000001 would be the genome's name. + +=item C, C + +Adjusting these parameters will affect which links are plotted. Links are heatmapped into bins based on dice score as that is the easiest measure to work with, and scales nicely from 0 to 100. For example, a link with a dice score of 20-29 would be plotted as 20% black (grey20), whereas a dice score of 90+ would be plotted as solid black + +=item C, C, C + +These parameters control the Needleman-Wunsch Multiple Sequence Alignment library's scoring scheme. Mismatch scores are generally negative and discourage unrelated proteins from being plotted in a line together. Match scores encourage related proteins to line up. Gap penalty is set at zero as we generally prefer gaps to mismatches in this tool; phage genomes are small and gaps are "cheap" to use, whereas mismatches can sometimes give an incorrect impression of relatedness. That said, how your plots look is completely up to you and we encourage experimentation! + +=item C + +Every Nth gene in a genome will be plotted a slightly different color. + +=back + +=head2 Why Can't I Control Colors? + + Brewer colors compose Brewer palettes which have been manually defined by + Cynthia Brewer for their perceptual properties. + + http://circos.ca/tutorials/lessons/configuration/colors/ + +Color palette choice is very important to creating an attractive and easy to read graphic. In the words of Dr. Krzywinski, L. Humans selecting from an RGB/HSV color palette tend to make poor choices, so we've removed the option in lieu of using the very attractive L. Specifically, I've selected the 8 class qualtitative "Accent" color set, which has produced some very nice maps. If you would like the option of selecting amongst the other 8-class qualitative color sets, please file a bug report and let me know. + + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/cpt_psm_2_gentable.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/cpt_psm_2_gentable.pl Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,168 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use Storable; +use CPT::GalaxyGetOpt; +use CPT::Bio::NW_MSA; +use Data::Dumper; +use CPT::Circos::Conf; +use POSIX; + + +my $ggo = CPT::GalaxyGetOpt->new(); +my $options = $ggo->getOptions( + 'options' => [ + [ 'file', 'PSM2 Data File', { validate => 'File/Input', required => 1 } ], + [], + ['Cutoffs'], + ['evalue' , 'Evalue cutoff' , { validate => 'Float' , default => 1e-4 } ] , + ['dice' , 'Dice cutoff' , { validate => 'Float' , default => 50 } ] , + [], + ['Alignment Options'], + ['mismatch' , 'Mismatch Score' , { validate => 'Float' , default => -1} ] , + ['gap_penalty' , 'Gap Penalty' , { validate => 'Float' , default => '0.0' } ] , + ['match' , 'Match Score' , { validate => 'Float' , default => 5 } ] , + ], + 'outputs' => [ + [ + 'diff_table', + 'Output Comparison Table', + { + validate => 'File/Output', + required => 1, + default => 'genome_comp', + data_format => 'text/tabular', + default_format => 'TSV_U', + }, + ], + [ + 'blastclust', + 'Output Blastclust Table', + { + validate => 'File/Output', + required => 1, + default => 'blastclust', + data_format => 'text/tabular', + default_format => 'TSV_U', + } + ], + ], + 'defaults' => [ + 'appid' => 'PSM.Comp', + 'appname' => 'PSM Comparison Table', + 'appdesc' => 'aligns and lists data from PSM Prep', + 'appvers' => '1.94', + ], + 'tests' => [ + ], +); + + +my %data_file = %{retrieve($options->{file})}; + +print STDERR "Aliging genomes\n"; +my $msa = CPT::Bio::NW_MSA->new( + gap_penalty => $options->{'gap_penalty'}, + match_score => $options->{'match'}, + mismatch_score => $options->{'mismatch'}, + bidi => 1, +); + +my @hits = @{$data_file{hit_table}}; +my @clusters; + +foreach my $hit(@hits){ + my ($from, $to, $evalue, $dice) = @{$hit}; + if($evalue < $options->{evalue} && $dice > $options->{dice}){ + if($options->{verbose}){ + print "$from $to\n"; + } + + my $foundmatch = 0; + foreach my $cluster(@clusters){ + if($from ~~ @{$cluster} || $to ~~ @{$cluster}){ + $foundmatch = 1; + if(!($from ~~ @{$cluster})){ + push(@{$cluster}, $from); + } + if(!($to ~~ @{$cluster})){ + push(@{$cluster}, $to); + } + } + } + if($foundmatch == 0){ + push(@clusters, ["".($#clusters+2), $from, $to]); + } + $msa->add_relationship($from, $to); + } +} +my @fixed_clusters; +foreach my $cluster (@clusters) { + my ($idx, @values) = @{$cluster}; + push(@fixed_clusters, [$idx, join(',', @values)]); +} + +my @user_ordering = keys($data_file{gbk}); + +foreach my $genome(@user_ordering){ + print STDERR "\tAligning $genome\n"; + my $gi_list_ref = $data_file{gbk}{$genome}{gi};#"GI" list + if(! defined $gi_list_ref){ + warn "Could not find $genome genome in the data file. Please be sure you have correctly specified the name of a genome from a genbank file. (See the LOCUS line for the name)."; + }else{ + $msa->align_list($gi_list_ref); + } +} + +my @aligned_results = $msa->merged_array(); +# Remove CRC64 hashes from sequences +foreach my $row(@aligned_results){ + $row = [map { s/_[A-F0-9]{16}$//; $_ } @{$row}]; + #my $key = ${$row}[0]; + #foreach my $cluster(@clusters){ + + #} +} + +my %table = ( + 'Sheet1' => { + header => \@user_ordering, + data => \@aligned_results, + } +); + + +use CPT::OutputFiles; +my $crr_output = CPT::OutputFiles->new( + name => 'diff_table', + GGO => $ggo, +); +$crr_output->CRR(data => \%table); + +my %table2 = ( + 'Sheet1' => { + header => ['Cluster ID', 'Contents'], + data => \@fixed_clusters, + } +); +my $crr_output2 = CPT::OutputFiles->new( + name => 'blastclust', + GGO => $ggo, +); +$crr_output2->CRR(data => \%table2); + +=head1 DESCRIPTION + +Following the execution of the PSM Prep tool, this tool simply aligns the genomes and generates a table comparison the positions of all proteins. It can be very useful to figure out which genes are missing in which genomes. + +=head2 IMPORTANT PARAMETERS + +=over 4 + +=item C, C, C + +These parameters control the Needleman-Wunsch Multiple Sequence Alignment library's scoring scheme. Mismatch scores are generally negative and discourage unrelated proteins from being plotted in a line together. Match scores encourage related proteins to line up. Gap penalty is set at zero as we generally prefer gaps to mismatches in this tool; phage genomes are small and gaps are "cheap" to use, whereas mismatches can sometimes give an incorrect impression of relatedness. That said, how your plots look is completely up to you and we encourage experimentation! + +=back + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/cpt_psm_2_gentable.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/cpt_psm_2_gentable.xml Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,92 @@ + + + aligns and lists data from PSM Prep + perl cpt_psm_2_gentable.pl --version + + + + + perl + perl-bioperl + perl-moose + perl-ipc-run + perl-getopt-long-descriptive + + + + + + + + + + + + + + + + + DESCRIPTION +=========== + +Following the execution of the PSM Prep tool, this tool simply aligns +the genomes and generates a table comparison the positions of all +proteins. It can be very useful to figure out which genes are missing in +which genomes. + +IMPORTANT PARAMETERS +-------------------- + +- ``mismatch``, ``gap_penalty``, ``match`` + + These parameters control the Needleman-Wunsch Multiple Sequence + Alignment library's scoring scheme. Mismatch scores are generally + negative and discourage unrelated proteins from being plotted in a + line together. Match scores encourage related proteins to line up. + Gap penalty is set at zero as we generally prefer gaps to mismatches + in this tool; phage genomes are small and gaps are "cheap" to use, + whereas mismatches can sometimes give an incorrect impression of + relatedness. That said, how your plots look is completely up to you + and we encourage experimentation! + + + + + diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,37 @@ +package CPT; +use strict; +use warnings; +use Moose; + +# ABSTRACT: main library wrapping most actions needed by CPT's portal scripts. + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT - main library wrapping most actions needed by CPT's portal scripts. + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Analysis/PAUSE.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Analysis/PAUSE.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,197 @@ +package CPT::Analysis::PAUSE; + +# ABSTRACT: Library for use in PAUSE analysis +use strict; +use warnings; +use Moose; +use List::Util qw(sum); +use Statistics::Descriptive; + +sub max ($$) { shift; $_[ $_[0] < $_[1] ] } +sub min ($$) { shift; $_[ $_[0] > $_[1] ] } + +sub derivative { + my ( $self, $data_ref ) = @_; + my @data = @{$data_ref}; + my @new_data; + foreach ( my $i = 0 ; $i < scalar(@data) - 1 ; $i++ ) { + $new_data[ $i + 1 ] = $data[ $i + 1 ] - $data[$i]; + } + return \@new_data; +} + +sub find_peaks { + my ( $self, %data ) = @_; + use IPC::Run3; + use File::Temp qw/tempfile/; + + # Store to CSV File + my @starts = @{ $data{data} }; + my ( $fh0, $filename0 ) = tempfile('galaxy.pause.XXXXXXX'); + printf $fh0 ( "%s,%s\n", 'position', 'count' ); + for ( my $i = 0 ; $i < scalar(@starts) ; $i++ ) { + printf $fh0 "%d,%d\n", $i, + ( defined $starts[$i] ? $starts[$i] : 0 ); + } + close($fh0); + + my ( $fh, $filename ) = tempfile('galaxy.pause.XXXXXXX'); + my @cmd = ( + 'Rscript', $data{location_of_rscript_file}, + $filename0, $filename, $data{snr} + ); + my ( $in, $out, $err ); + run3 \@cmd, \$in, \$out, \$err; + + # Read in R data + my @values; + while (<$fh>) { + chomp; + push( @values, $_ ); + } + close($fh); + + unlink($filename0); + unlink($filename); + + return @values; +} + +sub smooth { + my ( $self, $data_ref ) = @_; + my @data = @{$data_ref}; + my @new_data; + my $length = scalar @data; + foreach ( my $i = 0 ; $i < $length ; $i++ ) { + my $avg = + sum( @data[ $i - 20 .. $i - 1, $i + 1 .. $i + 20 ] ) / 40; + $new_data[$i] = $avg; + } + return \@new_data; +} + +sub histogram { + my ( $self, %data ) = @_; + + my @coverage = @{ $data{data} }; + my @return_coverage; + for ( my $i = 0 ; $i < scalar(@coverage) ; $i++ ) { + my $size = $coverage[$i]; + unless ($size) { $size = 0 } + $return_coverage[$i] = [ $i, $size, "*" x $size ]; + } + my %results = ( + 'Sheet1' => { + headers => [qw(Base Count Plot)], + data => \@return_coverage, + } + ); + return %results; +} + +sub getCoverageDensity { + my ( $self, %data ) = @_; + + # Load the sam file + my $sam = Bio::DB::Sam->new( + -bam => $data{bam}, + -fasta => $data{genome}, + -autoindex => 1, + ); + + # Get all alignments to our indicated FASTA file + my @alignments = $sam->get_features_by_location( + -seq_id => $data{fasta_id}, + -start => 1, + -end => $data{fasta_length} + ); + + # Set up some variables + my $coverage_density_max_value = 0; + my ( @coverage_density, @read_starts, @read_ends ); + + # including some for statistics + my $stat_start = Statistics::Descriptive::Sparse->new(); + my $stat_end = Statistics::Descriptive::Sparse->new(); + + # Looping over alignments + for my $a (@alignments) { + my $start = $a->start; + my $end = $a->end; + + # Increment the number of reads starting there + $read_starts[$start]++; + $read_ends[$end]++; + + # And increment the coverage density + foreach ( $start .. $end ) { + $coverage_density[$_]++; + if ( $coverage_density[$_] > + $coverage_density_max_value ) + { + $coverage_density_max_value = + $coverage_density[$_]; + } + } + } + my @start_data_for_stats; + my @end_data_for_stats; + for ( my $i = 0 ; $i < $data{fasta_length} ; $i++ ) { + if ( $read_starts[$i] ) { + push( @start_data_for_stats, $read_starts[$i] ); + } + if ( $read_ends[$i] ) { + push( @end_data_for_stats, $read_ends[$i] ); + } + } + $stat_start->add_data(@start_data_for_stats); + $stat_end->add_data(@end_data_for_stats); + + # Lots of data to return + use CPT::Analysis::PAUSE::ParsedSam; + my $psam = CPT::Analysis::PAUSE::ParsedSam->new( + coverage_density => \@coverage_density, + read_starts => \@read_starts, + read_ends => \@read_ends, + max => $coverage_density_max_value, + stats_start_max => $stat_start->max(), + stats_end_max => $stat_end->max(), + stats_start_mean => $stat_start->mean(), + stats_end_mean => $stat_end->mean(), + stats_start_standard_deviation => + $stat_start->standard_deviation(), + stats_end_standard_deviation => $stat_end->standard_deviation(), + ); + return $psam; +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Analysis::PAUSE - Library for use in PAUSE analysis + +=head1 VERSION + +version 1.96 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Analysis/PAUSE/ParsedSam.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Analysis/PAUSE/ParsedSam.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,49 @@ +package CPT::Analysis::PAUSE::ParsedSam; + +# ABSTRACT: Library for use in PAUSE analysis +use strict; +use warnings; +use Moose; +use SVG; + +has 'coverage_density' => ( is => 'rw', isa => 'ArrayRef' ); +has 'read_starts' => ( is => 'rw', isa => 'ArrayRef' ); +has 'read_ends' => ( is => 'rw', isa => 'ArrayRef' ); +has 'max' => ( is => 'rw', isa => 'Int' ); +has 'stats_start_max' => ( is => 'rw', isa => 'Num' ); +has 'stats_end_max' => ( is => 'rw', isa => 'Num' ); +has 'stats_start_mean' => ( is => 'rw', isa => 'Num' ); +has 'stats_end_mean' => ( is => 'rw', isa => 'Num' ); +has 'stats_start_standard_deviation' => ( is => 'rw', isa => 'Num' ); +has 'stats_end_standard_deviation' => ( is => 'rw', isa => 'Num' ); + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Analysis::PAUSE::ParsedSam - Library for use in PAUSE analysis + +=head1 VERSION + +version 1.96 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Analysis/PAUSE/SVG.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Analysis/PAUSE/SVG.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,445 @@ +package CPT::Analysis::PAUSE::SVG; + +# ABSTRACT: Library for use in PAUSE analysis +use strict; +use warnings; +use Moose; +use Data::Dumper; +use List::MoreUtils qw(each_array); +use SVG; + +has 'svg' => ( is => 'rw' ); +has 'width' => ( is => 'rw', isa => 'Int' ); +has 'height' => ( is => 'rw', isa => 'Int' ); +has 'vertical_offset' => ( is => 'rw', isa => 'Int' ); +has 'start_end_max_num' => ( is => 'rw', isa => 'Int' ); +has 'num_rows' => ( is => 'rw', isa => 'Int' ); +has 'row_size' => ( is => 'rw', isa => 'Int' ); +has 'row_width' => ( is => 'rw', isa => 'Int' ); +has 'x_border' => ( is => 'rw', isa => 'Int' ); +has 'y_border' => ( is => 'rw', isa => 'Int' ); +has 'line_height' => ( is => 'rw', isa => 'Int' ); +has 'inter_line_spacing' => ( is => 'rw', isa => 'Int' ); +has 'max' => ( is => 'rw', isa => 'Int' ); +has 'fasta_id' => ( is => 'rw', isa => 'Any' ); + +sub setup { + my ($self) = @_; + $self->svg( + SVG->new( + width => $self->width(), + height => $self->height(), + ) + ); +} + +sub add_header { + my ( $self, @refs ) = @_; + + $self->plot_title( 'Plot of ' . $self->fasta_id() ); + + my $i = 0; + foreach (@refs) { + my @subrefs = @{$_}; + foreach (@subrefs) { + $i++; + my %d = %{$_}; + $self->plot_key( $d{name}, $d{line}, $d{fill}, $i ); + } + } + $self->vertical_offset( $self->vertical_offset() - ( $i - 1 ) * 20 ); +} + +my $global_pline_idx = 0; + +sub plot_track { + my ( $self, $points_ref, $stroke, $fill, $id ) = @_; + + $global_pline_idx++; + $self->svg()->polyline( + %{$points_ref}, + id => 'pline_' . $id . '-' . $global_pline_idx, + style => { + 'fill-opacity' => .5, + 'stroke' => $stroke, + 'fill' => $fill, + } + ) + +} + +sub make_scale { + my ( $self, $i, $start, $stop ) = @_; + + # Left axis label, must be rotated + my $tmp_x = $self->fix_x_value(-30); #$self->x_border()-30; + my $tmp_y = $self->fix_y_value($i) + 50; + $self->svg()->text( + id => 'left_side_label_row_' . $i, + x => $tmp_x - 20, + y => $tmp_y - 20, + 'font-family' => 'Helvetica, sans-serif', + 'transform' => sprintf( 'rotate(-90 %s %s)', $tmp_x, $tmp_y ), + )->cdata('Start/End Hit Count Scale'); + + # Right axis label, must be rotated + $tmp_x = $self->fix_x_value( $self->row_width() + 60 ); + $tmp_y = $self->fix_y_value($i); + $self->svg()->text( + id => 'right_side_label_row_' . $i, + x => $tmp_x - 20, + y => $tmp_y, + 'font-family' => 'Helvetica, sans-serif', + 'transform' => sprintf( 'rotate(-90 %s %s)', $tmp_x, $tmp_y ), + )->cdata('Coverage Density'); + + # Horizontal increments + for ( my $k = -4 ; $k <= 4 ; $k++ ) { + + # Left side label + my $y_position = $k / 4 * $self->line_height(); + $self->svg()->text( + id => sprintf( 'label_left_side_row_%s_%s', $i, $k ), + x => $self->fix_x_value(-30), + y => $self->fix_y_value( $i, $y_position ), + 'font-family' => 'Helvetica, sans-serif', + )->cdata( int( $self->start_end_max_num() * abs( $k / 4 ) ) ); + + # Right side label + $self->svg()->text( + id => sprintf( 'label_right_side_row_%s_%s', $i, $k ), + x => $self->fix_x_value( $self->row_width() + 10 ), + y => $self->fix_y_value( $i, $y_position ), + 'font-family' => 'Helvetica, sans-serif', + )->cdata( int( $self->max() * abs( $k / 4 ) ) ); + + # Vertical lines + $self->svg()->line( + x1 => $self->fix_x_value( $self->row_width() ), + x2 => $self->fix_x_value(0), + y1 => $self->fix_y_value( $i, $y_position ), + y2 => $self->fix_y_value( $i, $y_position ), + id => sprintf( 'vertical_increment_row_%s_%s', $i, $k ), + opacity => .25, + stroke => 'rgb(0,0,0)', + 'stroke-width' => '2', + ); + } + + # Vertical Increments + my $number_of_increments = 10; + for ( + my $k = 0 ; + $k <= $self->row_width() ; + $k += ( $self->row_width() / $number_of_increments ) + ) + { +# We get % of way across (k/num_inc) and we multiply by the width value, to get % of width which we adjust with start to get correct value + my $b = + ( $k / $self->row_width() ) * ( $stop - $start ) + $start; + my $kb = $b / 1000; + $self->svg()->text( + id => + sprintf( 'vertical_line_label_row_%s_%s', $i, $k ), + x => $self->fix_x_value($k), + y => + $self->fix_y_value( $i, $self->line_height() + 20 ), + 'font-family' => 'Helvetica, sans-serif', + )->cdata( ($kb) . ' kb' ); + + $self->svg()->line( + x1 => $self->fix_x_value($k), + x2 => $self->fix_x_value($k), + y1 => $self->fix_y_value( $i, -$self->line_height() ), + y2 => $self->fix_y_value( $i, $self->line_height() ), + id => sprintf( 'vertical_line_row_%s_%s', $i, $k ), + opacity => .5, + stroke => 'rgb(0,0,0)', + 'stroke-width' => '1', + ); + } +} + +sub plot_title { + my ( $self, $string ) = @_; + $self->svg()->text( + id => 'label_plot_title', + x => $self->x_border(), + y => 50 + $self->vertical_offset(), + 'font-family' => 'Helvetica, sans-serif', + 'font-size' => '150%', + )->cdata($string); + $self->vertical_offset( $self->vertical_offset() + 25 ); +} + +sub plot_key { + my ( $self, $text, $stroke, $colour, $i ) = @_; + + $self->svg()->rectangle( + x => $self->x_border(), + y => 50 + $self->vertical_offset() - 15, + width => 15, + height => 15, + id => 'label_key_example' . $i, + 'fill-opacity' => .5, + 'stroke' => $stroke, + 'fill' => $colour, + ); + $self->svg()->text( + id => 'label_key_string' . $i, + x => $self->x_border() + 20, + y => 50 + $self->vertical_offset(), + 'font-family' => 'Helvetica, sans-serif', + )->cdata($text); + $self->vertical_offset( $self->vertical_offset + 20 ); +} + +sub xmlify { + my ($self) = @_; + return $self->svg()->xmlify(); +} + +sub x_values_for_range_scaled { + my ( $self, $start, $end, $pieces ) = @_; + my @vals = ($start); + my $by = ( $end - $start ) / $pieces; + ## For all values from the x_border to xborder+row_width, add a value of row_width split/row_size (i.e., how far for EACH INDIVIDUAL value) + for ( my $i = $start ; $i < $end ; $i += $by ) { + push( @vals, $i ); + } + push( @vals, $end ); + return @vals; +} + +sub fix_x_values { + my ( $self, @values ) = @_; + return map { $self->fix_x_value($_) } @values; +} + +sub fix_x_value { + my ( $self, $val ) = @_; + return $val + $self->x_border(); +} + +sub fix_y_value { + my ( $self, $i, $val ) = @_; + return ( + $self->vertical_offset() + $val - $self->line_height() + ( + ( ( 2 + $i ) * $self->line_height() ) + + ( $i * $self->inter_line_spacing() ) + + $self->y_border() + ) + ); +} + +sub fix_all_y_values { + my ( $self, $i, @arrays_to_fix, ) = @_; + for ( my $j = 0 ; $j < scalar @arrays_to_fix ; $j++ ) { + + # For each array in postive_y (AoA) + # + # we cast to array, then we map this, then we have this + # in an anonymous array which means we can just + # replace. This is probably not as efficient as looking + # at every value directly and doing "in place" + # replacement, but I don't know how that would be + # written here... + $arrays_to_fix[$j] = + [ map { $self->fix_y_value( $i, $_ ) } + @{ $arrays_to_fix[$j] } ]; + } + return @arrays_to_fix; +} + +sub copy_data { + my ( $self, $start, $stop, $data_to_ref, $data_from_ref, $max ) = @_; + + # Copy data from the original array to the new one, transforming out + # the subset of interest. This is done across an AoA + my @data_to = @{$data_to_ref}; + my @data_from = @{$data_from_ref}; + for ( my $k = 0 ; $k < scalar @data_from ; $k++ ) { + foreach ( my $j = $start ; $j < $stop ; $j++ ) + { #1 to 10_000 in the genome + if ( defined ${ $data_from[$k] }[$j] ) { + push( + @{ $data_to[$k] }, + -( + $self->line_height() * + ${ $data_from[$k] }[$j] / + $max + ) + ); + } + else { + push( @{ $data_to[$k] }, 0 ); + } + } + } + return @data_to; +} + +sub plot_individual_row { + my ( $self, $start, $stop, $i, $regular_ref, $rescale_ref ) = @_; + + my @regular = map { ${$_}{data} } @{$regular_ref}; + my @rescale = map { ${$_}{data} } @{$rescale_ref}; + my @regular_y; + my @rescale_y; + ## Ensure we duplicate the number of arrays. + foreach (@regular) { + push( @regular_y, [] ); + } + foreach (@rescale) { + push( @rescale_y, [] ); + } + + # Determine bounds of row + $self->push_all( \@regular_y, 0 ); + $self->push_all( \@rescale_y, 0 ); + + @regular_y = + $self->copy_data( $start, $stop, \@regular_y, \@regular, + $self->start_end_max_num() ); + @rescale_y = + $self->copy_data( $start, $stop, \@rescale_y, \@rescale, + $self->max() ); + + #print @rescale_y; + + # Set up our X values + my @x_values = $self->fix_x_values( + $self->x_values_for_range_scaled( + 0, $self->row_width(), ( $stop - $start ) + ) + ); + +#my @x_values_rescale = $self->fix_x_values( $self->x_values_for_range_scaled(0, $self->row_width(), ( $stop - $start ))); + + $self->push_all( \@regular_y, 0 ); + $self->push_all( \@rescale_y, 0 ); + + # Fix the ys + @regular_y = $self->fix_all_y_values( $i, @regular_y ); + @rescale_y = $self->fix_all_y_values( $i, @rescale_y ); + + # Prepare our styling + my @regular_line = map { ${$_}{line} } @{$regular_ref}; + my @rescale_line = map { ${$_}{line} } @{$rescale_ref}; + my @regular_fill = map { ${$_}{fill} } @{$regular_ref}; + my @rescale_fill = map { ${$_}{fill} } @{$rescale_ref}; + + # Add data to plot + $self->svg_add_track( + \@x_values, \@regular_y, \@regular_line, + \@regular_fill, "$i-$start-$stop" + ); + $self->svg_add_track( + \@x_values, \@rescale_y, \@rescale_line, + \@rescale_fill, "$i-$start-$stop" + ); + + # scale + $self->make_scale( $i, $start, $stop ); +} + +sub debug { + my ( $self, $title, @arrs ) = @_; + print "=" x 16 . "\n"; + foreach (@arrs) { + my @arr = @{$_}; + printf "Array %s : %s\n", $title, scalar @arr; + print "\t" + . join( ',', + map { sprintf( '%-10d', int($_) ) } @arr[ 0 .. 10 ] ) + . "\n"; + my $a = scalar(@arr) - 11; + my $b = scalar(@arr) - 1; + print "\t" + . join( ',', + map { sprintf( '%-10d', int($_) ) } @arr[ $a .. $b ] ) + . "\n"; + } +} + +sub svg_add_track { + my ( + $self, $x_values_ref, $data_ref, + $line_ref, $fill_ref, $base_track_id + ) = @_; + my @x_values = @{$x_values_ref}; + my @data = @{$data_ref}; + my @lines = @{$line_ref}; + my @fills = @{$fill_ref}; + + my $it = each_array( @data, @lines, @fills ); + while ( my ( $pry, $prl, $prf ) = $it->() ) { + my $plot_data = $self->svg()->get_path( + x => \@x_values, + y => $pry, + -type => 'polyline', + -closed => 'false' #specify that the polyline is closed. + ); + $self->plot_track( $plot_data, $prl, $prf, "$base_track_id" ); + } +} + +sub plot_data { + my ( $self, %d ) = @_; + $self->add_header( $d{regular}, $d{rescale} ); + + ##loop through rows + foreach ( my $i = 0 ; $i < $self->num_rows() ; $i++ ) { + my ( $start, $stop ) = + ( $i * $self->row_size(), ( $i + 1 ) * $self->row_size() ); + $self->plot_individual_row( $start, $stop, $i, $d{regular}, + $d{rescale}, ); + } +} + +sub plot_data_subset { + my ( $self, %d ) = @_; + + $self->add_header( $d{regular}, $d{rescale} ); + ##loop through rows + $self->plot_individual_row( $d{from}, $d{to}, 0, $d{regular}, + $d{rescale}, ); +} + +sub push_all { + my ( $self, $array_ref, @values ) = @_; + foreach ( @{$array_ref} ) { + push( @{$_}, @values ); + } +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Analysis::PAUSE::SVG - Library for use in PAUSE analysis + +=head1 VERSION + +version 1.96 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Analysis/TerL.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Analysis/TerL.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,517 @@ +package CPT::Analysis::TerL; + +# ABSTRACT: Guess phage packaging strategy based on homology to terminases (TerL) of phages with known packaging strategies + +use strict; +use warnings; +use Data::Dumper; +use autodie; +use Moose; +use Bio::SearchIO; +use File::ShareDir; +use File::Spec; + +has 'hmmer_evalue_cutoff' => ( is => 'rw', isa => 'Int' ); +has 'blast_evalue_cutoff' => ( is => 'rw', isa => 'Int' ); +has 'blast_dice_cutoff' => ( is => 'rw', isa => 'Int' ); + +has 'search_hmmer' => ( is => 'rw', isa => 'Bool' ); +has 'search_blast' => ( is => 'rw', isa => 'Bool' ); + +has 'data_dir' => ( is => 'rw', isa => 'Any' ); + +has 'awk_string' => ( + is => 'ro', + isa => 'Str', + default => +'BEGIN{print "#row,query id,subject id,evalue,dice" } {qid=$1;sid=$2;percent_identical=$3;query_length=$4;subject_length=$5;evalue=$6;dice=(2*percent_identical*subject_length/ ( subject_length + query_length ) );printf("%d,%s,%s,%s,%s\n",FNR, qid, sid, dice, evalue);}' +); + +has 'input_file' => ( is => 'rw', isa => 'Str' ); + +my @column_max = ( 20, 10, 20, 10, 20, 60 ); +my %hits; + +sub run { + my ( $self, %data ) = @_; + + #$self->prepare(%data); + my $dir = '/galaxy/tool-data/' # File::ShareDir::dist_dir('CPT-Analysis-TerL'); + $self->data_dir($dir); + $self->input_file( $data{input_file} ); + + if ( $self->search_hmmer() ) { + $self->hmmer(); + } + if ( $self->search_blast() ) { + $self->blast($self); + } + + return $self->guess(); +} + +sub hmmer { + my ($self) = @_; + + use Term::ProgressBar 2.00; + my $progress = Term::ProgressBar->new( + { + name => 'HMMER', + count => 100, + ETA => 'linear', + } + ); + $progress->max_update_rate(1); + + my $hmmer_db_dir = + File::Spec->catdir( $self->data_dir(), 'db', 'hmmer' ); + my @hmmer_dbs = glob( File::Spec->catfile( $hmmer_db_dir, "*.hmm" ) ); + + my $i = 0; + foreach (@hmmer_dbs) { + $progress->update( 100 * ( $i++ ) / scalar @hmmer_dbs ); + my $db_name = substr( $_, rindex( $_, '/' ) + 1, -4 ); + my $output_filename = sprintf( + '%s.%s.out', + substr( + $self->input_file(), 0, + rindex( $self->input_file(), '.' ) + ), + $db_name + ); + my $query = sprintf( 'hmmsearch %s %s > %s', + $_, $self->input_file(), $output_filename ); + system($query); + + my $in = Bio::SearchIO->new( + -format => 'hmmer', + -file => $output_filename + ); + while ( my $result = $in->next_result ) { + while ( my $hit = $result->next_hit ) { + while ( my $hsp = $hit->next_hsp ) { + my ( $from, $to ) = + ( $result->query_name, $hit->name ); + unless ( $hits{$to}{$from} ) { + $hits{$to}{$from} = []; + } + push( + $hits{$to}{$from}, + { + 'type' => 'hmmer', + 'data' => { + 'evalue' => ( + $hsp + ->evalue + eq '0' + ? '0.0' + : $hsp + ->evalue + ), + } + } + ); + } + } + } + unlink($output_filename); + } + $progress->update(100); +} + +sub blast { + my ($self) = @_; + + use Term::ProgressBar 2.00; + my $progress = Term::ProgressBar->new( + { + name => 'Blast', + count => 100, + ETA => 'linear', + } + ); + $progress->max_update_rate(1); + + my $blast_db_dir = + File::Spec->catdir( $self->data_dir(), 'db', 'blast' ); + my @blast_dbs = + map { substr( $_, 0, -4 ) } + glob( File::Spec->catfile( $blast_db_dir, "*.phr" ) ); + + my $i = 0; + + #my %hits; + + foreach my $blast_db (@blast_dbs) { + $progress->update( 100 * ( $i++ ) / scalar(@blast_dbs) ); + my $output_str = + substr( $blast_db, rindex( $blast_db, '/' ) + 1 ) . '.csv'; + my $query = sprintf( +'psiblast -query %s -db %s -evalue %s -outfmt "6 qseqid sseqid pident qlen slen evalue" | awk \'%s\' > %s', + $self->input_file(), $blast_db, '1e-5', + $self->awk_string(), $output_str ); + system($query); + open( my $tmpfh, '<', $output_str ); + while (<$tmpfh>) { + chomp $_; + if ( $_ !~ /^#/ ) { + my @line = split( /,/, $_ ); + unless ( $hits{ $line[1] }{ $line[2] } ) { + $hits{ $line[1] }{ $line[2] } = []; + } + push( + $hits{ $line[1] }{ $line[2] }, + { + 'type' => 'psiblast', + 'data' => { + 'evalue' => $line[4], + 'dice' => $line[3], + } + } + ); + } + } + close($tmpfh); + unlink($output_str); + + } + $progress->update(100); +} + +sub guess { + my ($self) = @_; + + open( my $groupings_fh, + '<', + File::Spec->catfile( $self->data_dir(), 'groupings.tsv' ) ); + + # Load groupings.tsv into memory + my %data; + while (<$groupings_fh>) { + if ( $_ !~ /^#/ ) { + chomp $_; + my ( $major, $minor, $hit ) = split( /\t/, $_ ); + unless ( $data{$major}{$minor} ) { + $data{$major}{$minor} = []; + } + push( $data{$major}{$minor}, $hit ); + } + } + + # Create a reverse lookup table + my %rdata; + foreach my $i ( keys %data ) { + foreach my $j ( keys %{ $data{$i} } ) { + foreach my $k ( @{ $data{$i}{$j} } ) { + if ( defined($k) ) { + $rdata{$k} = [ $i, $j ]; + } + else { + print "$i $j\n"; + } + } + } + } + + # Table printing stuff + my @header = ( + 'Major Category', + 'Major hits', + 'Minor Category', + 'Minor hits', + 'Analysis', + 'Evidence Type', + 'Evidence' + ); + my %output; + + # Loop across the input keys + foreach my $input_key ( keys %hits ) { + my %guesses; + my %guess_evidence; + + # And across all of the hits that the query hit to + foreach my $against ( keys %{ $hits{$input_key} } ) { + + # We look at the evidence + my @evidence = @{ $hits{$input_key}{$against} }; + + my ( $type_major, $type_minor ); + if ( $rdata{$against} ) { + ( $type_major, $type_minor ) = + @{ $rdata{$against} }; + } + else { + ( $type_major, $type_minor ) = ( + substr( + $against, 0, + rindex( $against, '_' ) + ), + substr( + $against, + rindex( $against, '_' ) + 1 + ) + ); + } + + # Prepare hashes. + unless ( $guesses{$type_major} ) { + $guesses{$type_major} = (); + } + unless ( $guesses{$type_major}{$type_minor} ) { + $guesses{$type_major}{$type_minor} = (); + } + + # Loop across the evidence + foreach my $piece_of_evidence (@evidence) { + + # Here is an example piece of evidence + # 'GK_Gilmour_Gene43' => { + # 'SP18' => [ + # { + # 'data' => { + # 'evalue' => '2e-08', + # 'dice' => '23.8627' + # }, + # 'type' => 'psiblast' + # } + # ], + # + if ( $type_major !~ /subject i/ ) { + my %piece = %{$piece_of_evidence}; + if ( + $self->validate_evidence( + $piece_of_evidence) > 0 + ) + { + $guess_evidence{$type_major}++; + $guess_evidence{ + "$type_major$type_minor" + }++; + + # If it's not defined, set up sub arrays. + unless ( $guesses{$type_major} + {$type_minor} + { $piece{'type'} } ) + { + if ( $piece{'type'} + eq 'psiblast' ) + { + $guesses{ + $type_major + }{$type_minor} + { + $piece{ +'type' + } + } + = { + 'evalue' + => [], + 'dice' + => [] + }; + } + else { + $guesses{ + $type_major + }{$type_minor} + { + $piece{ +'type' + } + } + = { 'evalue' + => [] + }; + } + } + + # If it's zero, correct to zero. + if ( $piece{'data'}{'evalue'} + eq '0.0' ) + { + $piece{'data'}{'evalue'} + = '0.0'; + } + + # Add our evalue + push( + $guesses{$type_major} + {$type_minor} + { $piece{'type'} } + {'evalue'}, + $piece{'data'}{'evalue'} + ); + + # And if psiblast, add dice + if ( $piece{'type'} eq + 'psiblast' ) + { + push( + $guesses{ + $type_major + }{$type_minor} + { + $piece{ +'type' + } + }{'dice'}, + $piece{'data'} + {'dice'} + ); + } + } + } + } + } + + my @output_sheet; + + foreach my $major ( keys %guesses ) { + if ( $guess_evidence{$major} ) { + foreach my $minor ( keys %{ $guesses{$major} } ) + { + if ( $guess_evidence{"$major$minor"} ) { + foreach my $evidence_category ( + keys %{ + $guesses{$major} + {$minor} + } + ) + { + if ( $evidence_category + ne 'evidence' ) + { + # things like evalue, dice + my %hits = %{ + $guesses{ + $major + }{ + $minor + }{ + $evidence_category + } + }; + foreach + my $subtype ( + keys + %hits ) + { # should be evalue, dice + push( + @output_sheet, + [ + $major, + $guess_evidence{ + $major + } + , + $minor, + $guess_evidence{ +"$major$minor" + } + , + $evidence_category, + $subtype, + join + ( +',', + @{ + $hits{ + $subtype + } + } + ) + ] + ); + } + } + } + } + } + } + } + + if ( !scalar @output_sheet ) { + @output_sheet = ( ['No evidence above threshold'], ); + } + $output{$input_key} = { + header => \@header, + data => \@output_sheet, + }; + } + return \%output; +} + +sub validate_evidence { + my ( $self, $piece_of_evidence ) = @_; + my %piece = %{$piece_of_evidence}; + + #my ($self, $type, $subtype, $value) = @_; + # { + # 'data' => { + # 'evalue' => '2e-08', + # 'dice' => '23.8627' + # }, + # 'type' => 'psiblast' + # } + if ( $piece{type} eq 'hmmer' ) { + my $value = $piece{data}{evalue}; + if ( $value eq '0.0' || $value eq '0' ) { + return 1; + } + elsif ( !defined($value) || $value eq '' ) { + return 0; + } + else { + return ( log($value) < $self->hmmer_evalue_cutoff() ) + ; # -64 + } + } + elsif ( $piece{type} eq 'psiblast' ) { + my $evalue = $piece{data}{evalue}; + my $dice = $piece{data}{dice}; + + my $evalue_return = 0; + if ( $evalue eq '0.0' || $evalue eq '0' ) { + $evalue_return = 1; + } + else { + $evalue_return = + ( log($evalue) < $self->blast_evalue_cutoff() ); #-140 + } + + return $evalue_return + && ( $dice > $self->blast_dice_cutoff() ); # 30 + } +} + +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Analysis::TerL - Guess phage packaging strategy based on homology to terminases (TerL) of phages with known packaging strategies + +=head1 VERSION + +version 1.96 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Auth.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Auth.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,84 @@ +package CPT::Auth; +use strict; +use warnings; +use autodie; +use Moose; + +has DN => ( + is => 'rw', + isa => 'Str', +); + + +sub check_credentials { + my ( $self, %params ) = @_; + use Net::LDAPS; + print STDERR "Connecting to LDAP\n"; + my $base = 'dc=tamu,dc=edu'; + my $ldap = Net::LDAPS->new('00-ldap-biobio.tamu.edu') or die "$@"; + my $mesg = $ldap->bind; # an anonymous bind + + my $username = $params{'username'}; + my $password = $params{'password'}; + + $mesg = $ldap->search( # perform a search + base => $base, + filter => "uid=$username", + ); + my $max = $mesg->count; + + # Should we exit early? + for ( my $i = 0 ; $i < $max ; $i++ ) { + my $entry = $mesg->entry($i); + $self->DN() = $entry->dn(); + } + $mesg = $ldap->bind( $self->DN(), password => $password ); + if ( $mesg->error() eq 'Success' ) { + return 1; + + #print "Succesfully logged you in"; + } + else { + return 0; + + #print "Error: "; + #print $mesg->error(); + } +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Auth + +=head1 VERSION + +version 1.99.4 + +=head2 check_credentials + + $cptauth->check_credentials(username=>'J.doe',password=>$password); + +return 1 or 0, based on success or failure, respectively. + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Bio.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Bio.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,337 @@ +package CPT::Bio; +use Moose; +use strict; +use warnings; +use autodie; +use CPT::FiletypeDetector; +use CPT::BioData; +my $bd = CPT::BioData->new(); + +my $filetype = CPT::FiletypeDetector->new(); + +has 'var_translate' => ( is => 'rw', isa => 'Bool'); +has 'var_header' => ( is => 'rw', isa => 'Bool'); +has codonTable => ( + is => 'rw', + isa => 'Any', + default => sub { + $bd->getTranslationTable(11) + }, +); + +sub set_codon_table { + my ($self, $num) = @_; + $self->codonTable($bd->getTranslationTable($num)); +} + + +sub _getFeatureTag { + my ( $self, $feat, $tag ) = @_; + if(! defined($feat)){ + warn "Undefined feature"; + } + return $feat->has_tag($tag) + ? ( join( ',', $feat->get_tag_values($tag) ) ) + : ''; +} + +sub _getIdentifier { + my ( $self, $feat ) = @_; + my $line; + if ( ref $feat eq 'Bio::Seq::RichSeq' || ref $feat eq 'Bio::Seq' ) { + return $feat->display_id; + } + else { + my $locus_tag = $self->_getFeatureTag( $feat, 'locus_tag' ); + if ($locus_tag) { + return $locus_tag; + } + my $gene = $self->_getFeatureTag( $feat, 'gene' ); + if ($gene) { + return $gene; + } + my $product = $self->_getFeatureTag( $feat, 'product' ); + if ($product) { + return $product; + } + } + return sprintf("%s_%s_%s", $feat->start(), $feat->end(), ($feat->strand() == 1 ? 'sense':'antisense')); +} + + +sub requestCopy { + my ( $self, %data ) = @_; + use Bio::SeqIO; + if ($data{'file'} ) { + my ($guessed_type) = $filetype->detect( $data{'file'} ); + my $seqio = Bio::SeqIO->new( + -file => $data{'file'}, + -format => $guessed_type + ); + my @results; + while ( my $seqobj = $seqio->next_seq() ) { + return \$seqobj; + } + } + else { + die "No file specified"; + } +} + + +sub getSeqIO { + my ( $self, $file ) = @_; + use Bio::SeqIO; + if ($file ) { + my ($guessed_type) = $filetype->detect( $file ); + my $seqio = Bio::SeqIO->new( + -file => $file, + -format => $guessed_type + ); + return $seqio; + } + else { + die "No file specified"; + } +} + + +sub parseFile { + my ( $self, %data ) = @_; + use Bio::SeqIO; + + my ($guessed_type) = $filetype->detect( $data{'file'} ); + my $seqio = Bio::SeqIO->new( + -file => $data{'file'}, + -format => $guessed_type + ); + + # Are we to translate this + $self->var_translate(defined($data{translate}) && $data{translate}); + $self->var_header(defined($data{header}) && $data{header}); + + my @results; + if ( not defined $data{'subset'} ) { + $data{'subset'} = 'all'; + } + while ( my $seqobj = $seqio->next_seq() ) { + if ( + (ref $data{'subset'} ne 'ARRAY' + && $data{'subset'} eq 'whole' ) # Want the whole thing for a richseq + || + (ref $seqobj eq 'Bio::Seq' || ref $seqobj eq 'Bio::Seq::fasta') + # or it's a fasta type sequence + ) + { + push( @results, $self->handle_seq($seqobj)); + } + else #data subset eq sometag + { + my %wanted_tags; + if ( ref $data{'subset'} eq 'ARRAY' ) { + %wanted_tags = + map { $_ => 1 } @{ $data{'subset'} }; + } + else { + $wanted_tags{ $data{'subset'} }++; + } + foreach my $feat ( $seqobj->get_SeqFeatures ) { + if ( + $wanted_tags{ $feat->primary_tag } + || ( $wanted_tags{'all'} + && $feat->primary_tag ne + "source" ) + ) + { + push( @results, $self->handle_seq($feat)); + } + } + } + } + if ( $data{'callback'} ) { + $data{'callback'}->( \@results ); + } + else { + return \@results; + } +} + +sub handle_seq { + my ($self, $obj) = @_; + + my @line; + if ( $self->var_header() ){ + $line[0] = '>' . $self->_getIdentifier($obj); + } + + # Get our sequence + $line[1] = $self->intelligent_get_seq($obj); + + if ( $self->var_translate() ) { + $line[1] = $self->translate($line[1]); + } + return \@line; +} + +sub intelligent_get_seq { + my ($self, $obj, %extra) = @_; + # Top level, e.g., fasta/gbk file, "extra" doesn't apply to these + if ( ref $obj eq 'Bio::Seq::RichSeq' || ref $obj eq 'Bio::Seq' ) { + return $obj->seq; + }else{ + return $self->get_seq_from_feature($obj, %extra); + } +} +sub get_seq_from_feature { + my ($self, $feat, %extra) = @_; + my $seq; + my $l; + if($extra{parent}){ + $l = $extra{parent}->length(); + } + + if($extra{upstream}){ + if($feat->strand < 0){ + my $y = $feat->end + 1; + my $z = $feat->end + $extra{upstream}; + if($y < $l){ + if($z > $l){ + $z = $l; + } + $seq .= $extra{parent}->trunc($y, $z)->revcom->seq; + } + }else{ + my $y = $feat->start - $extra{upstream}; + my $z = $feat->start - 1; + if($z > 0){ + if($y < 1){ + $y = 1; + } + $seq .= $extra{parent}->trunc($y, $z)->seq; + } + } + } + if(ref($feat->location) eq 'Bio::Location::Simple'){ + $seq .= $feat->seq->seq(); + }else{ + $seq .= $feat->spliced_seq->seq(); + } + if($extra{downstream}){ + if($feat->strand < 0){ + my $y = $feat->start - $extra{downstream}; + my $z = $feat->start - 1; + if($z > 0){ + if($y < 1){ + $y = 1; + } + $seq .= $extra{parent}->trunc($y, $z)->revcom->seq; + } + }else{ + my $y = $feat->end + 1; + my $z = $feat->end + $extra{downstream}; + if($y < $l){ + if($z > $l){ + $z = $l; + } + $seq .= $extra{parent}->trunc($y, $z)->seq; + } + } + } + + return $seq; +} + + +sub translate { + my ($self, $seq) = @_; + if($seq =~ /^[ACTGN]+$/){ + my %ct = %{$self->codonTable}; + $seq = join( '' , map { if($ct{$_}){ $ct{$_} }else{ () } } unpack("(A3)*", $seq)); + } + return $seq; +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Bio + +=head1 VERSION + +version 1.99.4 + +=head2 _getFeatureTag + + my $tag = $libCPT->_getFeatureTag($feature,'note'); + +returns all values of the given tag, joined with ','. + +=head2 requestCopy + + my $seqobj = $libCPT->requestCopy('file'=>'test.gbk'); + +requests a 'copy' of a given Bio::SeqIO file, which allows for addition of features before writing out to file. + +=head2 getSeqIO + + my $seqio = $libCPT->getSeqIO('file'=>'test.gbk'); + +requests a 'copy' of a given Bio::SeqIO file, which allows for addition of features before writing out to file. + +=head2 parseFile + + $libCPT->parseFile( + 'file' => $options{'file'}, + 'callback' => \&func, + 'translate' => 1, + 'header' => 1, + 'subset' => ['CDS', $options{'tag'}], + ); + +Arguably the most important function in this library, wraps a lot of functionality in a clean wrapper, since most of the scripts we have are written around data munging. + +=over 4 + +=item * + +file - the Bio::SeqIO file to process + +=item * + +callback - the function to send our data to. Done all at once, in an array + +=item * + +translate - should we translate the sequence to amino acids if it's not already. + +=item * + +subset - either "whole", a valid tag, or an array of valid tags + +=item * + +header - Do we want a header (FASTA) with our result set + +=back + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Bio/DataSource.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Bio/DataSource.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,49 @@ +package CPT::Bio::DataSource; +use Moose::Role; +use strict; +use warnings; +use autodie; + +requires 'getSeqIO'; + + +sub test { + +} + +no Moose::Role; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Bio::DataSource + +=head1 VERSION + +version 1.99.4 + +=head2 test + + $ds->test() + +empty method for now + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Bio/DataSource/Chado.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Bio/DataSource/Chado.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,125 @@ +package CPT::Bio::DataSource::Chado; +no warnings; +use Moose; +with 'CPT::Bio::DataSource'; + +has 'host' => ( is => 'rw', isa => 'Str' ); +has 'pass' => ( is => 'rw', isa => 'Str' ); +has 'user' => ( is => 'rw', isa => 'Str' ); +has 'name' => ( is => 'rw', isa => 'Str' ); +has 'port' => ( is => 'rw', isa => 'Str' ); + +has 'landmark' => ( is => 'rw', isa => 'Str' ); +has 'organism' => ( is => 'rw', isa => 'Str' ); + + +sub getSeqIO { + my ($self) = @_; + require CPT::Chado::GMOD_Conf; + + my $db = Bio::DB::Das::Chado->new( + -dsn => sprintf( 'dbi:Pg:dbname=%s;host=%s;port=%s', $self->name(), $self->host(), $self->port() ), + -user => $self->user(), + -pass => $self->pass(), + -organism => $self->organism(), + -inferCDS => 1, + + ); + + # Get a list of "segments". Essentially (seqlen IS NOT NULL) + my @segments = $db->segment( -name => $self->{'landmark'} ); + + # TODO: Need to have a fallback method + # Should only produce ONE since we specify landmark exactly + foreach my $segment (@segments) { + my $stream = $segment->get_feature_stream(); + use Bio::Seq; + my $seq_obj = Bio::Seq->new( + -seq => $segment->seq->seq(), + -display_id => $segment->id() + ); + use Bio::SeqFeature::Generic; + while ( my $feat = $stream->next_seq ) { + + # In an IDEAL world we'd just do $seq_obj->add_SeqFeature($feat); + # + # HOWEVER. + # + # ------------- EXCEPTION: Bio::Root::NotImplemented ------------- + # MSG: Abstract method "Bio::DB::Das::Chado::Segment::Feature::attach_seq" is not implemented by package Bio::DB::Das::Chado::Segment::Feature. + # This is not your fault - author of Bio::DB::Das::Chado::Segment::Feature should be blamed! + # STACK: Error::throw + # STACK: Bio::Root::Root::throw /usr/local/share/perl/5.14.2/Bio/Root/Root.pm:472 + # STACK: Bio::Root::RootI::throw_not_implemented /usr/local/share/perl/5.14.2/Bio/Root/RootI.pm:748 + # STACK: Bio::DB::Das::Chado::Segment::Feature::attach_seq /usr/local/share/perl/5.14.2/Bio/DB/Das/Chado/Segment/Feature.pm:374 + # STACK: Bio::Seq::add_SeqFeature /usr/local/share/perl/5.14.2/Bio/Seq.pm:1148 + # STACK: chado_export.pl:59 + # ---------------------------------------------------------------- + + # BUT WE CAN'T. >_> rageface.tiff + + my %keys; + foreach my $tag ( $feat->get_all_tags() ) { + my @values = $feat->get_tag_values($tag); + if ( $tag eq 'Note' ) { + $tag = 'note'; + } + if ( $tag eq 'Dbxref' ) { + $tag = 'db_xref'; + + #@values = map { if($_ ne 'GFF_source:Genbank'){ $_ } } @values; + @values = grep !/GFF_source:Genbank/, @values; + } + $keys{$tag} = \@values; + } + + #print $feat->gff_string(),"\n"; + my $new_feat = new Bio::SeqFeature::Generic( + -start => $feat->start(), + -end => $feat->end(), + -strand => $feat->strand(), + -primary_tag => $feat->primary_tag(), + -tag => \%keys, + ); + $seq_obj->add_SeqFeature($new_feat); + } + + return $seq_obj; + } + +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Bio::DataSource::Chado + +=head1 VERSION + +version 1.99.4 + +=head2 getSeqIO + +supposed to get a seqIO object from a chado DB. not fully implemented + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Bio/DataSource/GFF3.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Bio/DataSource/GFF3.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,42 @@ +package CPT::Bio::DataSource::GFF3; +no warnings; +use Moose; +with 'CPT::Bio::DataSource'; + + +sub getSeqIO { + die 'unimplemented'; +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Bio::DataSource::GFF3 + +=head1 VERSION + +version 1.99.4 + +=head2 getSeqIO + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Bio/DataSource/GenBank.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Bio/DataSource/GenBank.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,55 @@ +package CPT::Bio::DataSource::GenBank; +no warnings; +use Moose; +with 'CPT::Bio::DataSource'; + +has 'file' => ( is => 'rw', isa => 'Str' ); + + +sub getSeqIO { + my ($self) = @_; + use Bio::SeqIO; + my $seqio = Bio::SeqIO->new( + -file => $self->file(), + -format => 'GenBank', + ); + my @results; + return $seqio; +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Bio::DataSource::GenBank + +=head1 VERSION + +version 1.99.4 + +=head2 getSeqIO + + $gbk_ds->getSeqIO(); + +supposed to return a genabnk DS. Not tested. + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Bio/Dbxref.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Bio/Dbxref.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,78 @@ +package CPT::Bio::Dbxref; +use Moose; +use autodie; +use YAML; +use File::ShareDir; +use File::Spec qw/catfile/; + +has 'regex_map' => ( is => 'rw', isa => 'HashRef'); +has 'initialized' => ( is => 'rw', isa => 'Int', default => 0); + +sub init { + my ($self) = @_; + # Locate file + my $data_dir = File::ShareDir::dist_dir('libCPT'); + my $dbxref_data = File::Spec->catfile($data_dir, 'dbxref.yaml'); + # Parse + $self->regex_map(YAML::LoadFile($dbxref_data)); + $self->initialized(1); +} + +sub get_prefix { + my ($self, $dbxref) = @_; + if(!$self->initialized()){ + $self->init(); + } + + my @hits; + my %map = %{$self->regex_map()}; + # Search through regex database + foreach my $db(keys(%map)){ + if(defined($map{$db}{local_id_syntax})){ + my $ref = $map{$db}{local_id_syntax}; + if(ref($ref) eq 'ARRAY'){ + foreach my $regi(@{$ref}){ + if($dbxref =~ /$regi/){ + push(@hits, $map{$db}{abbreviation}); + } + } + } + if($dbxref =~ /$ref/){ + push(@hits, $map{$db}{abbreviation}); + } + } + } + return @hits; +} + + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Bio::Dbxref + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Bio/GFF_Parsing.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Bio/GFF_Parsing.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,89 @@ +package CPT::Bio::GFF_Parsing; +use Moose; +use autodie; + +my $tags = 'allele anticodon artificial_location +bio_material bound_moiety cell_line cell_type chromosome citation +clone clone_lib codon_start collected_by collection_date compare +country cultivar culture_collection db_xref dev_stage direction +EC_number ecotype environmental_sample estimated_length exception +experiment focus frequency function gap_type gene gene_synonym +germline haplogroup haplotype host identified_by inference isolate +isolation_source lab_host lat_lon linkage_evidence locus_tag +macronuclear map mating_type mobile_element_type mod_base mol_type +ncRNA_class note number old_locus_tag operon organelle organism +partial PCR_conditions PCR_primers phenotype plasmid pop_variant +product protein_id proviral pseudo rearranged replace +ribosomal_slippage rpt_family rpt_type rpt_unit_range rpt_unit_seq +satellite segment serotype serovar sex specimen_voucher +standard_name strain sub_clone sub_species sub_strain tag_peptide +tissue_lib tissue_type transgenic translation transl_except +transl_table trans_splicing variety'; +my %valid_tags = map { $_ => 1 } split( /\s+/, $tags ); + +my $keys = "-10_signal -35_signal 3'UTR 5'UTR +CAAT_signal CDS C_region D-loop D_segment GC_signal J_segment LTR +N_region RBS STS S_region TATA_signal V_region V_segment +assembly_gap attenuator enhancer exon gap gene iDNA intron mRNA +mat_peptide misc_RNA misc_binding misc_difference misc_feature +misc_recomb misc_signal misc_structure mobile_element +modified_base ncRNA old_sequence operon oriT polyA_signal +polyA_site precursor_RNA prim_transcript primer_bind promoter +protein_bind rRNA rep_origin repeat_region sig_peptide source +stem_loop tRNA terminator tmRNA transit_peptide unsure variation"; +my %valid_keys = map { $_ => 1 } split( /\s+/, $keys ); + +has 'tag_conv' => ( is => 'ro', isa => 'HashRef', default => sub { return { + #tags + 'dbxref' => 'db_xref', + 'parent' => 'note', + 'name' => 'label', + 'Name' => 'label', + 'old-name' => 'obsolete_name', + 'id' => 'note', + 'nat-host' => 'host', + 'genome' => 'note', + 'region' => 'source', +}; }); + + +sub fix_gff_tag { + my ($self, $tag) = @_; + # Lowercase it + if(defined $tag && defined(${$self->tag_conv()}{lc($tag)})){ + return ${$self->tag_conv()}{lc($tag)}; + } + return $tag; +} + + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Bio::GFF_Parsing + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Bio/Lipo.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Bio/Lipo.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,85 @@ +package CPT::Bio::Lipo; +use Moose; +use strict; +use warnings; +use Data::Dumper; +use autodie; + +# ABSTRACT: Lipo finding functionality in a library. + +sub run_hash { + my ( $self, $hash_ref ) = @_; + my %hash = %{$hash_ref}; + my @return_keys; + foreach ( sort { $a <=> $b } keys %hash ) { + if ( $hash{$_}[4] !~ qr/^.{10,39}C/ ) { + next; + } + else { + $hash{$_}[5] = $self->run_seq( $hash{$_}[4] ); + } + } + return \%hash; +} + +sub run_seq { + my ( $self, $seq ) = @_; + my @C; #A list of each C in the string, in str(10,40) + for ( my $j = 10 ; $j < length($seq) && $j < 40 ; $j++ ) { + if ( substr( $seq, $j, 1 ) =~ /[c]/i ) { + push( @C, $j ); + } + } + my @results; + for ( my $z = 0 ; $z < scalar(@C) ; $z++ ) { + my $upC10 = ""; + + #Make sure it's not ALL DEKRs 10 residues upstream. (Does that happen O.o) + if ( substr( $seq, $C[$z] - 10, 10 ) !~ /[DEKR]/ ) { + push( + @results, + [ + substr( $seq, 0, $C[$z] - 10 ), + substr( $seq, $C[$z] - 10, 10 ), + substr( $seq, $C[$z], 1 ), + substr( $seq, $C[$z] + 1 ), + ] + ); + } + } + if ( scalar @results ) { + return \@results; + } + return undef; +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Bio::Lipo - Lipo finding functionality in a library. + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Bio/NW_MSA.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Bio/NW_MSA.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,228 @@ +package CPT::Bio::NW_MSA; +use Moose; +use strict; +use warnings; +use autodie; +use List::Util qw(max); + +has 'sequences' => ( is => 'rw', isa => 'ArrayRef'); +# If true, relationships are assumed to be bidirectional +has 'bidi' => (is => 'rw', isa => 'Bool'); +has 'relationships' => ( is => 'rw', isa => 'HashRef', default => sub { {} }); +# +has 'verbose' => ( is => 'rw', isa => 'Num'); +# +has 'gap_penalty' => ( is => 'rw', isa => 'Num'); +has 'match_score' => ( is => 'rw', isa => 'Num'); +has 'mismatch_score' => ( is => 'rw', isa => 'Num'); + +# local stuff +#has 'current_list' => ( is => 'rw', isa => 'ArrayRef'); +has 'merger' => (is => 'rw', isa => 'HashRef'); +has 'number_of_aligned_lists' => ( is => 'rw', isa => 'Num', default => sub { 0 }); + +sub add_relationship { + my ($self, $from, $to) = @_; + ${$self->relationships()}{$from}{$to} = 1; + if($self->bidi()){ + ${$self->relationships()}{$to}{$from} = 1; + } +} + +sub Sij { + my($self, $merger_row, $query) = @_; + # Comparing a query against a merger row + my @check_against = @{$merger_row}; + #print "Checking " . join(",",@check_against) .":$query\t"; + foreach(@check_against){ + if(${$self->relationships()}{$query}{$_} + || ${$self->relationships()}{$_}{$query}){ + #print $self->match_score() . "\n"; + return $self->match_score(); + } + } + #print $self->mismatch_score() . "\n"; + return $self->mismatch_score(); +} + +sub align_list { + my ($self, $list_ref) = @_; + # If we haven't aligned any lists, we do something special + if($self->number_of_aligned_lists() == 0){ + # Pretend we've aligned ONE list already + $self->number_of_aligned_lists(1); + my @list = @{$list_ref}; + # Fake the merger + my %merger; + for(my $i = 0; $i < scalar @list; $i++){ + $merger{$i} = [$list[$i]]; + } + $self->merger(\%merger); + }else{ + $self->find_best_path($self->merger(), $list_ref); + $self->number_of_aligned_lists($self->number_of_aligned_lists() + 1); + } +} + +sub find_best_path { + my ($self, $merger_ref, $list_ref) = @_; + my %merger = %{$merger_ref}; + my @list = @{$list_ref}; + + my $max_i = scalar(keys(%merger)); + my $max_j = scalar(@list); + + my %score_mat; + my %point_mat; + + # Initial zeros for matrices + $point_mat{0}{0} = 'DONE'; + $score_mat{0}{0} = 0; + + for(my $a = 1; $a <= $max_i; $a++){ + $point_mat{$a}{0} = 'U'; + $score_mat{$a}{0} = $self->gap_penalty(); + } + for(my $b = 1; $b <= $max_j; $b++){ + $point_mat{0}{$b} = 'L'; + $score_mat{0}{$b} = $self->gap_penalty(); + } + + # Score + for(my $i = 1 ; $i <= $max_i; $i++){ + my $ci = $merger{$i-1}; + for(my $j = 1; $j <= $max_j; $j++){ + my $cj = $list[$j-1]; + # Scoring + my $diag_score = $score_mat{$i-1}{$j-1} + $self->Sij($ci,$cj); + my $up_score = $score_mat{$i-1}{$j} + $self->gap_penalty(); + my $left_score = $score_mat{$i}{$j-1} + $self->gap_penalty(); + + if($diag_score >= $up_score){ + if($diag_score >= $left_score){ + $score_mat{$i}{$j} = $diag_score; + $point_mat{$i}{$j} = 'D'; + }else{ + $score_mat{$i}{$j} = $left_score; + $point_mat{$i}{$j} = 'L'; + } + }else{ + if($up_score >= $left_score){ + $score_mat{$i}{$j} = $up_score; + $point_mat{$i}{$j} = 'U'; + }else{ + $score_mat{$i}{$j} = $left_score; + $point_mat{$i}{$j} = 'L'; + } + } + } + } + + $self->print2DArray('score_mat', \%score_mat); + $self->print2DArray('point_mat', \%point_mat); + + + # Calculate merger + my @new_row_set; + my $i = $max_i + 0; + my $j = $max_j + 0; + while($i != 0 || $j != 0){ + my $dir = $point_mat{$i}{$j}; + my @new_row; + if($dir eq 'D'){ + push(@new_row, @{$merger{$i-1}}, $list[$j-1]); + $i--; + $j--; + }elsif($dir eq 'L'){ + push(@new_row, split(//, '-' x ($self->number_of_aligned_lists()))); + push(@new_row, $list[$j-1]); + $j--; + }elsif($dir eq 'U'){ + push(@new_row, @{$merger{$i-1}}, '-'); + $i--; + } + if($self->verbose()){ + print join("\t", $i, $j, $dir, @new_row),"\n"; + } + push(@new_row_set,\@new_row); + } + + my %new_merger; + for(my $i = 0; $i < scalar(@new_row_set); $i++){ + $new_merger{$i} = $new_row_set[scalar(@new_row_set) - $i - 1]; + } + $self->merger(\%new_merger); +} + +sub merged_array{ + my ($self) = @_; + my %m = %{$self->merger()}; + my @result; + foreach(sort{$a<=>$b} keys(%m)){ + push(@result, $m{$_}); + } + return @result; +} + + +sub print2DArray{ + my($self,$name, $ref) = @_; + if($self->verbose && defined $ref){ + print '*' x 32,"\n"; + print $name,"\n"; + if(ref $ref eq 'ARRAY'){ + foreach(@{$ref}){ + print join("\t",@{$_}),"\n"; + } + }elsif(ref $ref eq 'HASH'){ + my %h = %{$ref}; + foreach my $a(sort(keys(%h))){ + if(ref $h{$a} eq 'ARRAY'){ + print join("", map { sprintf "%-4s",$_ } @{$h{$a}}); + }else{ + foreach my $b(sort(keys($h{$a}))){ + if(defined($h{$a}{$b})){ + printf "%5s", $h{$a}{$b}; + } + } + } + print "\n"; + } + }else{ + die 'Unsupported'; + } + print '*' x 32,"\n"; + } +} + + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Bio::NW_MSA + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Bio/ORF.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Bio/ORF.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,205 @@ +package CPT::Bio::ORF; +use strict; +use warnings; +use autodie; +use Moose; + +has min_gene_length => ( + is => 'rw', + isa => 'Int', + default => sub { + 0 + }, +); +has sc_atg => ( is => 'rw', isa => 'Bool', default => sub { 1 } ); +has sc_ttg => ( is => 'rw', isa => 'Bool', default => sub { 1 } ); +has sc_ctg => ( is => 'rw', isa => 'Bool', default => sub { 0 } ); +has sc_gtg => ( is => 'rw', isa => 'Bool', default => sub { 1 } ); + +our %code = ( + "TTT" => "F", "TTC" => "F", "TTA" => "L", "TTG" => "L", "TCT" => "S", + "TCC" => "S", "TCA" => "S", "TCG" => "S", "TAT" => "Y", "TAC" => "Y", + "TAA" => "*", "TAG" => "*", "TGT" => "C", "TGC" => "C", "TGA" => "*", + "TGG" => "W", "CTT" => "L", "CTC" => "L", "CTA" => "L", "CTG" => "L", + "CCT" => "P", "CCC" => "P", "CCA" => "P", "CCG" => "P", "CAT" => "H", + "CAC" => "H", "CAA" => "Q", "CAG" => "Q", "CGT" => "R", "CGC" => "R", + "CGA" => "R", "CGG" => "R", "ATT" => "I", "ATC" => "I", "ATA" => "I", + "ATG" => "M", "ACT" => "T", "ACC" => "T", "ACA" => "T", "ACG" => "T", + "AAT" => "N", "AAC" => "N", "AAA" => "K", "AAG" => "K", "AGT" => "S", + "AGC" => "S", "AGA" => "R", "AGG" => "R", "GTT" => "V", "GTC" => "V", + "GTA" => "V", "GTG" => "V", "GCT" => "A", "GCC" => "A", "GCA" => "A", + "GCG" => "A", "GAT" => "D", "GAC" => "D", "GAA" => "E", "GAG" => "E", + "GGT" => "G", "GGC" => "G", "GGA" => "G", "GGG" => "G", +); + + + +sub run { + my ($self, $sequence) = @_; + # Read through forward strand + my @putative_starts; + + # 30 seconds with a bioperl object + # 5 seconds with string munging. >:| + my $dna = uc( $sequence ); + my $length = length($sequence); + + # Pre-create the regular expressions + my ( $regex_forward, $regex_backwards ); + my $not_statement_f = '^'; + my $not_statement_r = '^'; + if ( !$self->sc_atg() ) { + $not_statement_f .= 'A'; + $not_statement_r .= 'T'; + } + if ( !$self->sc_ctg() ) { + $not_statement_f .= 'C'; + $not_statement_r .= 'G'; + } + if ( !$self->sc_ttg() ) { + $not_statement_f .= 'T'; + $not_statement_r .= 'A'; + } + if ( !$self->sc_gtg() ) { + $not_statement_f .= 'G'; + $not_statement_r .= 'C'; + } + + # If any start is acceptable, we re-add them and remove our ^ + if($not_statement_r eq '^' && $not_statement_f eq '^'){ + $not_statement_f = 'ACTG'; + $not_statement_r = 'ACTG'; + } + $regex_forward = qr/[${not_statement_f}]TG/; + $regex_backwards = qr/CA[${not_statement_r}]/; + + # Collect putative starts + for ( my $i = 1 ; $i < $length - 1 ; $i++ ) { + my $tri_nt = substr( $dna, $i - 1, 3 ); #$seq_obj->subseq($i,$i+2); + if ( $tri_nt =~ $regex_forward ) { + push( @putative_starts, [ $i, '+' ] ); + } + if ( $tri_nt =~ $regex_backwards ) { + push( @putative_starts, [ $i + 2, '-' ] ); + } + } + my %ORFs; + + #Loop through all of the starts we have + my $fc = 0; + my $rc = 0; + foreach (@putative_starts) { + my @putative_start = @{$_}; + + my $final_seq = ""; + + my $add; + my $tri_nt; + if ( $putative_start[1] eq "+" ) { + my $end; + for ( my $k = $putative_start[0] ; $k < $length ; $k = $k + 3 ) + { + my $tri_nt = substr( $dna, $k, 3 ); + my $aa = $code{$tri_nt}; + if ( $aa && $aa ne '*' ) { + $end = $k + 3; + $final_seq .= $tri_nt; + } + else { + last; + } + } + if ( length($final_seq)/3 > $self->min_gene_length() ) { + $ORFs{ 'f_' + $fc++ } = [ + length($final_seq)/3, + $putative_start[0], + $end, + 'F', + $final_seq + ]; + } + } # - strand + else { + my $end; + for ( my $k = $putative_start[0] ; $k >= 2 ; $k = $k - 3 ) { + my $tmp = reverse( substr( $dna, $k - 3, 3 ) ); + + $tmp =~ tr/ACTG/qzAC/; + $tmp =~ tr/qz/TG/; + + my $aa = $code{$tmp}; + if ( defined $aa && $aa ne '*' ) { + $end = $k - 1; + $final_seq .= $tmp; + } + else { + last; + } + + } + if ( length($final_seq)/3 > $self->min_gene_length() ) { + $ORFs{ 'r_' + $rc++ } = [ + length($final_seq)/3, + $end, + $putative_start[0], + 'R', + $final_seq + ]; + } + } + } + + my @orfs; + + for my $orf_key ( sort( keys(%ORFs) ) ) { + my @tmp= @{ $ORFs{$orf_key} }; + my $seqobj = Bio::Seq->new( + -display_id => sprintf( + 'orf%05d_%s', + ($orf_key + 1), $tmp[3], + ), + -desc => sprintf( + '[%s-%s; %s aa long]' + ,$tmp[1], $tmp[2], $tmp[0] + ), + -seq => $tmp[4] + ); + push(@orfs, $seqobj); + } + return @orfs; +} + + + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Bio::ORF + +=head1 VERSION + +version 1.99.4 + +=function run + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Bio/RBS.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Bio/RBS.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,59 @@ +package CPT::Bio::RBS; +use Moose; +use autodie; + +has 'algo' => ( is => 'rw', isa => 'Str' ); +has 'predictor' => ( is => 'rw', isa => 'Any' ); +has 'only_best' => ( is => 'rw', isa => 'Bool', default => sub { 0 } ); + +sub set_algorithm { + my ( $self, $algorithm ) = @_; + if ( $algorithm eq 'naive' ) { + use CPT::Bio::RBS::Algo::Naive; + my $a = CPT::Bio::RBS::Algo::Naive->new(); + $self->predictor($a); + } + else { + die 'Algorithm not implemented'; + } +} + +# Run the prediction on the sequence +sub predict { + my ( $self, $sequence ) = @_; + return $self->predictor()->predict( + sequence => lc($sequence), + return_best => $self->only_best(), + ); +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Bio::RBS + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Bio/RBS/Algo.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Bio/RBS/Algo.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,39 @@ +package CPT::Bio::RBS::Algo; +use Moose::Role; +use strict; +use warnings; +use autodie; + +# Function to predict RBSs in a given sequence +requires 'predict'; + +no Moose::Role; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Bio::RBS::Algo + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Bio/RBS/Algo/Naive.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Bio/RBS/Algo/Naive.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,103 @@ +package CPT::Bio::RBS::Algo::Naive; +use Moose; +with 'CPT::Bio::RBS::Algo'; +use CPT::Bio::RBS_Object; + +my @SDs = ( + 'aggaggt', + 'ggaggt', + 'aggagg', + 'aggag', + 'gaggt', + 'ggagg', + 'aggt', + 'gggt', + 'gagg', + 'gggg', + 'agga', + 'ggag', + 'gga', + 'gag', + 'agg', + 'ggt', +); + +sub predict { + my ( $self, %params ) = @_; + my $upstream = $params{sequence}; + my $only_best = $params{return_best}; + + my $length = length($upstream); + + my @results = (); + foreach my $rbs ( @SDs ){ + while ($upstream =~ /$rbs/g) { + # Position of regex match + my $loc = $-[0]; + # Seq before RBS + my $before = substr($upstream,0, $loc); + # Seq after RBS + my $after = substr($upstream, $loc + length($rbs)); + my $rbs_o = CPT::Bio::RBS_Object->new( + upstream => sprintf('%s %s %s', $before , uc($rbs) , $after), + score => $self->score_match($rbs, length($after)), + rbs_seq => uc($rbs), + separation => length($after), + ); + push( @results, $rbs_o ); + } + } + @results = sort { $b->score() <=> $a->score() } @results; + if (@results) { + if($only_best){ + return ($results[0]); + }else{ + return @results; + } + } + else { + return ( + CPT::Bio::RBS_Object->new( + upstream =>$upstream, + score => '-1', + rbs_seq => 'None', + separation => -1, + ) + ); + } +} + +sub score_match { + my ($self, $match, $dist) = @_; + return length($match); +} + +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Bio::RBS::Algo::Naive + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Bio/RBS_Object.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Bio/RBS_Object.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,39 @@ +package CPT::Bio::RBS_Object; +use Moose; +use autodie; + +has 'upstream' => ( is => 'rw', isa => 'Str' ); +has 'score' => (is => 'rw', isa => 'Int'); +has 'rbs_seq' => (is => 'rw', isa => 'Str'); +has 'separation' => (is => 'rw', isa => 'Int'); + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Bio::RBS_Object + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Bio/SAR.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Bio/SAR.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,76 @@ +package CPT::Bio::SAR; +use strict; +use warnings; +use autodie; +use Moose; + +sub filter_sar { + my ($self, @seqs) = @_; + my @good; + foreach(@seqs){ + if(has_sar_motif($_)){ + push(@good, $_); + } + } + return @good; +} + +sub has_sar_motif { + my ( $self, $seq ) = @_; + + return 0 if(length $seq < 40); + + my $reg_a = qr/([^DEKR]{3}K[^DEKR]{8,}[^DER]{1}[^DEKR]{3})/; + my $reg_b = qr/([KR]{1,}[^DEKR]{12,}[^DER]{1}[^DEKR]{3})/; + + my $first40 = substr( $seq, 0, 40 ); + + # there is a transmembrane domain in the first 40 AAs + # there is at least one positive charged AAs in front of the TMD + if ( $first40 =~ $reg_a || $first40 =~ $reg_b ) { + my $modi1st40 = $first40; + # Cut out the match, and then add the whole thing to the end. + my $t4homology = + #substr($seq,0, $-[0] ), # Before the match + #substr($seq, $-[0], ($+[0] - $-[0])), # the match + substr($seq, $+[0]). # After the match + $first40; + $t4homology = substr($t4homology, 0 , 40); + + if ( $t4homology =~ qr/E[A-Z]{8}[DC][A-Z]{4,5}T/ ) { + return 1; + } + } + return 0; +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Bio::SAR + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/BioData.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/BioData.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,246 @@ +package CPT::BioData; +use Moose; +use strict; +use warnings; +use autodie; + +has 'dummy_var' => (isa => 'Str', is => 'ro'); + +my %genbank_feature_tags = ( + "locus_tag" => 1, + "gene" => 1, + "product" => 1, + "allele" => 1, + "anticodon" => 1, + "artificial_location" => 1, + "bio_material" => 1, + "bound_moiety" => 1, + "cell_line" => 1, + "cell_type" => 1, + "chromosome" => 1, + "citation" => 1, + "clone" => 1, + "clone_lib" => 1, + "codon_start" => 1, + "collected_by" => 1, + "collection_date" => 1, + "compare" => 1, + "country" => 1, + "cultivar" => 1, + "culture_collection" => 1, + "db_xref" => 1, + "dev_stage" => 1, + "direction" => 1, + "EC_number" => 1, + "ecotype" => 1, + "environmental_sample" => 1, + "estimated_length" => 1, + "exception" => 1, + "experiment" => 1, + "focus" => 1, + "frequency" => 1, + "function" => 1, + "gap_type" => 1, + "gene_synonym" => 1, + "germline" => 1, + "haplogroup" => 1, + "haplotype" => 1, + "host" => 1, + "identified_by" => 1, + "inference" => 1, + "isolate" => 1, + "isolation_source" => 1, + "lab_host" => 1, + "lat_lon" => 1, + "linkage_evidence" => 1, + "macronuclear" => 1, + "map" => 1, + "mating_type" => 1, + "mobile_element_type" => 1, + "mod_base" => 1, + "mol_type" => 1, + "ncRNA_class" => 1, + "note" => 1, + "number" => 1, + "old_locus_tag" => 1, + "operon" => 1, + "organelle" => 1, + "organism" => 1, + "partial" => 1, + "PCR_conditions" => 1, + "PCR_primers" => 1, + "phenotype" => 1, + "plasmid" => 1, + "pop_variant" => 1, + "protein_id" => 1, + "proviral" => 1, + "pseudo" => 1, + "rearranged" => 1, + "replace" => 1, + "ribosomal_slippage" => 1, + "rpt_family" => 1, + "rpt_type" => 1, + "rpt_unit_range" => 1, + "rpt_unit_seq" => 1, + "satellite" => 1, + "segment" => 1, + "serotype" => 1, + "serovar" => 1, + "sex" => 1, + "specimen_voucher" => 1, + "standard_name" => 1, + "strain" => 1, + "sub_clone" => 1, + "sub_species" => 1, + "sub_strain" => 1, + "tag_peptide" => 1, + "tissue_lib" => 1, + "tissue_type" => 1, + "transgenic" => 1, + "translation" => 1, + "transl_except" => 1, + "transl_table" => 1, + "trans_splicing" => 1, + "variety" => 1, +); +my %artemis_colours = ( + 0 => 'rgb(255,255,255)', + 1 => 'rgb(100,100,100)', + 2 => 'rgb(255,0,0)', + 3 => 'rgb(0,255,0)', + 4 => 'rgb(0,0,255)', + 5 => 'rgb(0,255,255)', + 6 => 'rgb(255,0,255)', + 7 => 'rgb(255,255,0)', + 8 => 'rgb(152,251,152)', + 9 => 'rgb(135,206,250)', + 10 => 'rgb(255,165,0)', + 11 => 'rgb(200,150,100)', + 12 => 'rgb(255,200,200)', + 13 => 'rgb(170,170,170)', + 14 => 'rgb(0,0,0)', + 15 => 'rgb(255,63,63)', + 16 => 'rgb(255,127,127)', + 17 => 'rgb(255,191,191)', +); + +sub artemis_colour_decode{ + my ($self, $idx) = @_; + return $artemis_colours{$idx}; +} + +my %table321 = ( + 'Gly' => 'G', 'Pro' => 'P', + 'Ala' => 'A', + 'Val' => 'V', + 'Leu' => 'L', + 'Ile' => 'I', + 'Met' => 'M', + 'Cys' => 'C', + 'Phe' => 'F', + 'Tyr' => 'Y', + 'Trp' => 'W', + 'His' => 'H', + 'Lys' => 'K', + 'Arg' => 'R', + 'Gln' => 'Q', + 'Asn' => 'N', + 'Glu' => 'E', + 'Asp' => 'D', + 'Ser' => 'S', + 'Thr' => 'T', + 'XXX' => 'X', + 'End' => '*', + 'Stop' => '*' +); + +sub decode321{ + my ($self, $three) = @_; + return $table321{$three}; +} + + +sub get321Table { + my ($self) = @_; + return \%table321; +} + + +sub getTranslationTable { + my ($self, $table_id) = @_; + require Bio::Tools::CodonTable; + my $table = Bio::Tools::CodonTable->new( -id => (defined $table_id? $table_id: 1) ); + my %result; + my @codons = qw(A C T G); + foreach my $i (@codons) { + foreach my $j (@codons) { + foreach my $k (@codons) { + $result{"$i$j$k"} = $table->translate("$i$j$k"); + } + } + } + if(defined($table_id) && $table_id == 11){ + $result{TGA} = '*'; + $result{TAA} = '#'; + $result{TAG} = '+'; + } + return \%result; +} + + + +sub isValidTag { + my ( $self, $tag ) = @_; + return $genbank_feature_tags{$tag}; +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::BioData + +=head1 VERSION + +version 1.99.4 + +=head2 get321Table + + $bio->get321Table(); + +Convenience function which returns a codon translation table (3 letter ID to 1 letter code) + +=head2 getTranslationTable + + $bio->getTranslationTable(); + +Convenience function which returns a hash translated according to Bio::Tools::CodonTable + +This is done for speed reasons. CodonTable is very slow and we require better performance + +=head2 isValidTag + + if($cptbio->isValidTag('locus_tag')) { ... } + +Will validate a GBK feature tag + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/CLI.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/CLI.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,45 @@ +package CPT::CLI; +use strict; +use warnings; + +our $VERSION = '1.00'; + +=head1 NAME + +CPT::CLI - a stub library to make Dist::Zilla happy + +=head1 SYNOPSIS + + use CPT::CLI; + +=head1 FUNCTIONAL INTERFACE + +=head2 new + + my $libCPT = CPT::CLI->new(); + +=cut + +sub new { + my ( $class, %options ) = @_; + my $self = bless( {}, $class ); + return $self; +} + +1; +__END__ + +=head1 AUTHOR + +Eric Rasche, rasche.eric@yandex.ru + +=head1 COPYRIGHT AND LICENSE + +Copyright (C) 2013 by Eric Rasche + +This library is free software; you can redistribute it and/or modify +it under the same terms as Perl itself, either Perl version 5.14.2 or, +at your option, any later version of Perl 5 you may have available. + + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Chado/GMOD_Conf.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Chado/GMOD_Conf.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,75 @@ +package CPT::Chado::GMOD_Conf; +use Moose; + +has 'database_identifier' => ( is => 'rw', isa => 'Str' ); +has 'gmod_root' => ( is => 'rw', isa => 'Str', default => '/usr/share/gmod/' ); +has 'database' => ( is => 'rw', isa => 'Str' ); +has 'username' => ( is => 'rw', isa => 'Str' ); +has 'password' => ( is => 'rw', isa => 'Str' ); +has 'host' => ( is => 'rw', isa => 'Str', default => 'localhost' ); +has 'port' => ( is => 'rw', isa => 'Int', default => 5432 ); + +sub load_config { + my ( $self, $identifier ) = @_; + open( my $db_info, '<', + $self->gmod_root() . "conf/${identifier}.conf" ); + while (<$db_info>) { + chomp $_; + if ( $_ =~ /DBUSER=/ ) { + $self->username( substr( $_, 7 ) ); + } + elsif ( $_ =~ /DBPASS=/ ) { + $self->password( substr( $_, 7 ) ); + } + elsif ( $_ =~ /DBNAME=/ ) { + $self->database( substr( $_, 7 ) ); + } + } + close($db_info); +} + +sub get_connector { + my ( $self, $identifier ) = @_; + $self->load_config($identifier); + use DBI; + my $dbh = DBI->connect( + sprintf( + "dbi:Pg:dbname=%s;host=%s;port=%s;", + $self->database(), $self->host(), $self->port() + ), + $self->username(), + $self->password() + ); + return $dbh; +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Chado::GMOD_Conf + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Circos/Conf.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Circos/Conf.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,90 @@ +package CPT::Circos::Conf; +use strict; +use warnings; +use autodie; +use Moose; + +# This parameter specifies that we should behave according to galaxy_override spec. +has data => ( + is => 'rw', + isa => 'ArrayRef', + default => sub { + [] + }, +); +my @current_block = (); + +sub set { + my ($self, $key, $value) = @_; + $self->push_d(sprintf('%s = %s', $key, $value)); +} + +sub start_block { + my ($self, $block) = @_; + $self->push_d(sprintf('<%s>', $block)); + push(@current_block, $block); +} + +sub end_block { + my ($self) = @_; + $self->push_d(sprintf('', pop @current_block)); +} + +sub include { + my ($self, $file) = @_; + $self->push_d(sprintf('<>', $file)); +} + +sub push_d { + my ($self, $string) = @_; + push(@{$self->data()}, $self->spaces_for_indent() . $string); +} + +sub spaces_for_indent { + my ($self) = @_; + if(scalar(@current_block) > 0){ + return ' ' x scalar(@current_block); + } + return ''; +} + +sub finalize { + my ($self) = @_; + if(scalar @current_block > 0){ + die 'Blocks [' . join(',', @current_block) . '] were not closed'; + } + else{ + return join("\n", @{$self->data()}); + } +} + +no Moose::Role; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Circos::Conf + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/External.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/External.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,39 @@ +package CPT::External; +use Moose::Role; +use strict; +use warnings; +use autodie; + +requires 'analyze'; +requires 'cleanup'; + +no Moose::Role; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::External + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/External/LipoP.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/External/LipoP.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,92 @@ +package CPT::External::LipoP; +no warnings; +use Moose; +use File::Temp qw(tempfile); +use IPC::Run qw(run); + +has 'sequence' => ( is => 'rw', isa => 'Str' ); +has 'response' => ( is => 'rw', isa => 'Str' ); +has 'cleavage' => ( is => 'rw', isa => 'ArrayRef' ); +has 'tmpfile' => (is => 'rw', isa => 'Any'); + +sub create_fasta_file { + my ( $self, $seq ) = @_; + my ( $fh, $filename ) = tempfile( "cpt.lipop.XXXXXXX", UNLINK => 1 ); + + #printf $fh ">%s\n%s\n", 'seq', $seq; + printf $fh "%s\n", $seq; + close($fh); + return $filename; +} + +sub parse_text { + my ($self) = @_; + + foreach my $line ( split( /\n/, $self->response() ) ) { + if ( $line =~ /#.*cleavage=(\d+)-(\d+)/ ) { + $self->cleavage( [ $1, $2 ] ); + } + } + # # seq SpII score=17.8897 margin=13.48964 cleavage=19-20 Pos+2=K + # # Cut-off=-3 + # seq LipoP1.0:Best SpII 1 1 17.8897 + # seq LipoP1.0:Margin SpII 1 1 13.48964 + # seq LipoP1.0:Class SpI 1 1 4.40006 + # seq LipoP1.0:Class CYT 1 1 -0.200913 + # seq LipoP1.0:Signal CleavII 19 20 17.8897 # LVVSA|CKSPP Pos+2=K + # seq LipoP1.0:Signal CleavI 28 29 4.03409 # PPVQS|QRPEP + # seq LipoP1.0:Signal CleavI 21 22 2.04071 # VSACK|SPPPV + # seq LipoP1.0:Signal CleavI 27 28 -2.07502 # PPPVQ|SQRPE + # seq LipoP1.0:Signal CleavI 19 20 -2.6907 # LVVSA|CKSPP + # seq LipoP1.0:Signal CleavI 22 23 -2.87659 # SACKS|PPPVQ +} + +sub analyze { + my ( $self, $seq ) = @_; + $self->sequence($seq); + $self->tmpfile($self->create_fasta_file($seq)); + my @cmd = ( 'LipoP', $self->tmpfile(), '-workdir', '/tmp/nonexistant/', '-wwwdir', '/tmp/nonexistant' ); + my ( $in, $out, $err ); + run \@cmd, \$in, \$out, \$err; + if ($err) { print "Error: $err\n"; } + $self->response($out); + $self->parse_text(); + unlink($self->tmpfile()); +} + +sub cleanup { + my ($self) = @_; + return; +} + + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::External::LipoP + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/External/TMHMM.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/External/TMHMM.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,164 @@ +package CPT::External::TMHMM; +no warnings; +use Moose; +use File::Temp qw(tempfile); +use IPC::Run qw(run); +use File::Temp qw(tempdir); +use File::Copy qw(move); + +has 'sequence' => ( is => 'rw', isa => 'Str' ); +has 'response' => ( is => 'rw', isa => 'Str' ); + +has 'num_predicted' => ( is => 'rw', isa => 'Int' ); +has 'predicted_locations' => ( is => 'rw', isa => 'ArrayRef' ); +has 'prob_n_in' => ( is => 'rw', isa => 'Str' ); +has 'hash' => ( is => 'rw', isa => 'Str' ); +has 'picture_location' => ( is => 'rw', isa => 'Str' ); + +my ( $fh, $filename, $image ); + +sub create_fasta_file { + my ( $self, $seq ) = @_; + ( $fh, $filename ) = tempfile( "cpt.tmhmm.XXXXXXX", UNLINK => 1 ); + + #printf $fh ">%s\n%s\n", 'seq', $seq; + printf $fh "%s\n", $seq; + return $filename; +} + +sub parse_text { + my ($self) = @_; + unless ( $self->predicted_locations() ) { + $self->predicted_locations( [] ); + } + unless ( $self->num_predicted() ) { + $self->num_predicted(0); + } + foreach my $line ( split( /\n/, $self->response() ) ) { + if ( $line =~ /Number of predicted TMHs:\s*(\d+)/ ) { + $self->num_predicted($1); + } + elsif ( $line =~ /Total prob of N-in:\s*([0-9.]*)/ ) { + $self->prob_n_in($1); + } + elsif ( $line =~ /TMHMM2.0\s*TMhelix\s*([0-9]+)\s*([0-9]+)/ ) { + + #$self->num_predicted($self->num_predicted()+1); + push( @{ $self->predicted_locations() }, [ $1, $2 ] ); + } + } + + # # seq Length: 145 + # # seq Number of predicted TMHs: 1 + # # seq Exp number of AAs in TMHs: 20.91302 + # # seq Exp number, first 60 AAs: 20.91265 + # # seq Total prob of N-in: 0.04659 + # # seq POSSIBLE N-term signal sequence + # seq TMHMM2.0 outside 1 3 + # seq TMHMM2.0 TMhelix 4 23 + # seq TMHMM2.0 inside 24 145 +} + +sub analyze { + my ( $self, $seq ) = @_; + + # Set our hash + require Digest::MD5; + $self->hash( Digest::MD5::md5_hex($seq) ); + + # Set our sequence + $self->sequence($seq); + + # Tmp dir to run in + my $dir = tempdir( CLEANUP => 1 ); + my $tmpfile = $self->create_fasta_file($seq); + + # Plot and use specified workdir + my @cmd = ( 'tmhmm.pl', '-plot', '-workdir', $dir, '<', $tmpfile ); + + # Run the command + my ( $in, $out, $err ); + run \@cmd, \$in, \$out, \$err; + + # If error,we error + if ($err) { + + # print STDERR "Error: $err\n"; + # Kinda a crappy way to handle this... + $self->response($err); + return 0; + } + { + + # Move the created plot to a known location + my @tmhmm_files = glob("$dir/*/*.png"); + + # There's really only one png but this is just as easy to write + for my $png (@tmhmm_files) { + $image = sprintf( "/tmp/cpt.ext.tmhmm.%s.png", $self->hash() ); + move( $png, $image ); + } + + # Module to remove the temporary dir so we clean up after + # ourselves quickly, since this programme seems to open a LARGE + # number of file handles. + require File::Path; + File::Path::remove_tree($dir); + + # set response and parse it, then return OK. + $self->response($out); + $self->parse_text(); + return 1; + } +} + +sub cleanup { + if ( defined($fh) ) { + close($fh); + unlink($filename); + if ( -e $image ) { + unlink($image); + } + } +} + +END { + if ( defined($fh) ) { + close($fh); + unlink($filename); + if ( -e $image ) { + unlink($image); + } + } +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::External::TMHMM + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Filetype.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Filetype.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,51 @@ +package CPT::Filetype; +use Moose::Role; +use strict; +use warnings; +use autodie; + +has lines => ( is => 'rw', isa => 'ArrayRef'); +# Also have file location if we need to open a filehandle on it to further +# check. +has file => ( is => 'rw', isa => 'Str'); + + +requires 'score'; +requires 'name'; + +no Moose::Role; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Filetype + +=head1 VERSION + +version 1.99.4 + +=head1 score + +Score should be a method that returns a number between 0 and 1 describing the +probability that it is this format, with 1 indicating that it truly is this +file to the exclusion of every other type. + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Filetype/embl.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Filetype/embl.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,54 @@ +package CPT::Filetype::embl; +no warnings; +use Moose; +with 'CPT::Filetype'; + +sub score { + my ($self) = @_; + my $first_line = ${$self->lines()}[0]; + my @embl_identifiers = ( + 'FT', 'FH', 'SQ', 'DE', 'AC', 'PA', 'SV', 'DT', 'KW', + 'OS', 'OC', 'OX', 'R ', '' , 'DR', 'CC', 'CO', 'XX', + ); + my %embl_id_map = map { $_ => 1 } @embl_identifiers; + my $embl_score = 0; + foreach(@{$self->lines()}){ + $embl_score++ if($embl_id_map{substr($_,0,2)}); + } + return $embl_score/10; +} + +sub name { + return 'embl'; +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Filetype::embl + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Filetype/fasta.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Filetype/fasta.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,51 @@ +package CPT::Filetype::fasta; +no warnings; +use Moose; +with 'CPT::Filetype'; + +sub score { + my ($self) = @_; + my @l = @{$self->lines()}; + + for(my $i = 0; $i < scalar(@l) - 1; $i++){ + if(substr($l[$i],0,1) eq '>' && $l[$i+1] =~ /[A-Za-z]/){ + return 1; + } + } + return 0; +} + +sub name { + return 'fasta'; +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Filetype::fasta + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Filetype/gbk.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Filetype/gbk.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,46 @@ +package CPT::Filetype::gbk; +no warnings; +use Moose; +with 'CPT::Filetype'; + +sub score { + my ($self) = @_; + my $first_line = ${$self->lines()}[0]; + return $first_line =~ '^LOCUS'; +} + +sub name { + return 'genbank'; +} + + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Filetype::gbk + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Filetype/gff3.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Filetype/gff3.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,45 @@ +package CPT::Filetype::gff3; +no warnings; +use Moose; +with 'CPT::Filetype'; + +sub score { + my ($self) = @_; + my $first_line = ${$self->lines()}[0]; + return $first_line =~ '##gff-version 3'; +} + +sub name { + return 'gff'; +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Filetype::gff3 + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/FiletypeDetector.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/FiletypeDetector.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,117 @@ +package CPT::FiletypeDetector; +use Moose; +use strict; +use warnings; +use Data::Dumper; +use autodie; + +# ABSTRACT: an incredibly basic filetype detection library for genomic data + + +sub head { + my ($self, $filename) = @_; + # We're only going to focus on detecting a few types + open( my $file, '<', $filename ); + my @lines; + my $c = 0; + while (<$file>) { + # Read ten lines + if ( $c++ < 10 ) { + chomp $_; + push( @lines, $_ ); + } + # Then exit + else { + last; + } + } + close($file); + return @lines; +} + +sub detect { + my ( $self, $filename ) = @_; + + my @lines = $self->head($filename); + + use CPT::Filetype::gff3; + use CPT::Filetype::gbk; + use CPT::Filetype::embl; + use CPT::Filetype::fasta; + + my @scorers = ( + CPT::Filetype::gff3->new(lines => \@lines, file => $filename), + CPT::Filetype::gbk->new(lines => \@lines, file => $filename), + CPT::Filetype::embl->new(lines => \@lines, file => $filename), + CPT::Filetype::fasta->new(lines => \@lines, file => $filename), + ); + + my $best_score = 0; + my $best_name = ""; + foreach(@scorers){ + my $score = $_->score(); + # "1 indicating ... to the exclusion [of others] + if($score == 1){ + return $_->name(); + } + + # Otherwise check if better + if($score > $best_score){ + $best_name = $_->name(); + } + } + + return $best_name; + + # if(defined $string){ + # return 'fasta' if( $string =~ /\.(fasta|fast|seq|fa|fsa|nt|aa)$/i); + # return 'genbank' if( $string =~ /\.(gb|gbank|genbank|gbk)$/i); + # return 'scf' if( $string =~ /\.scf$/i); + # return 'pir' if( $string =~ /\.pir$/i); + # return 'embl' if( $string =~ /\.(embl|ebl|emb|dat)$/i); + # return 'raw' if( $string =~ /\.(txt)$/i); + # return 'gcg' if( $string =~ /\.gcg$/i); + # return 'ace' if( $string =~ /\.ace$/i); + # return 'bsml' if( $string =~ /\.(bsm|bsml)$/i); + # return 'swiss' if( $string =~ /\.(swiss|sp)$/i); + # return 'phd' if( $string =~ /\.(phd|phred)$/i); + # return 'gff' if( $string =~ /\.(gff|gff3)$/i); + # return 'blastxml' if( $string =~ /\.(xml)$/i); + # die "File type detection failure"; + # } + # else{ + # die "File type detection failure"; + # } + +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::FiletypeDetector - an incredibly basic filetype detection library for genomic data + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/GBK2GFF3.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/GBK2GFF3.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,376 @@ +package CPT::GBK2GFF3; +use strict; +use warnings; +use CPT; +use Data::Dumper; +use autodie; +use Bio::SeqIO; +use Moose; + +has data => ( + is => 'rw', + isa => 'ArrayRef', + default => sub { [] }, +); +has header => ( + is => 'rw', + isa => 'ArrayRef', +); +has sequence => ( + is => 'rw', + isa => 'Str', +); +has seqid => ( + is => 'rw', + isa => 'Str', +); +has genbank => ( + is => 'rw', + isa => 'Str', +); +has is_circular => ( + is => 'rw', + isa => 'Str', +); +has id_prefix => ( is => 'rw', isa => 'Str' ); +has global_feat_idx => ( is => 'rw', isa => 'Int', default => sub { 0 } ); +has source => ( is => 'rw', isa => 'ArrayRef'); + +my %feat_type_count; +my %reserved_keys = map { $_ => 1 } + qw/ID Name Alias Parent Target Gap Derives_from Note Dbxref Ontology_term/; + +use CPT::Bio; +my $bio = CPT::Bio->new(); +my %key_mapping = ( + "-" => [ "located_sequence_feature", "SO:0000110" ], + "-10_signal" => [ "minus_10_signal", "SO:0000175" ], + "-35_signal" => [ "minus_35_signal", "SO:0000176" ], + "3'UTR" => [ "three_prime_UTR", "SO:0000205" ], + "3'clip" => [ "three_prime_clip", "SO:0000557" ], + "5'UTR" => [ "five_prime_UTR", "SO:0000204" ], + "5'clip" => [ "five_prime_clip", "SO:0000555" ], + "CAAT_signal" => [ "CAAT_signal", "SO:0000172" ], + "CDS" => [ "CDS", "SO:0000316" ], + "D-loop" => [ "D_loop", "SO:0000297" ], + "D_segment" => [ "D_gene", "SO:0000458" ], + "GC_signal" => [ "GC_rich_region", "SO:0000173" ], + "LTR" => [ "long_terminal_repeat", "SO:0000286" ], + "RBS" => [ "ribosome_entry_site", "SO:0000139" ], + "STS" => [ "STS", "SO:0000331" ], + "TATA_signal" => [ "TATA_box", "SO:0000174" ], + "attenuator" => [ "attenuator", "SO:0000140" ], + "enhancer" => [ "enhancer", "SO:0000165" ], + "exon" => [ "exon", "SO:0000147" ], + "gap" => [ "gap", "SO:0000730" ], + "gene" => [ "gene", "SO:0000704" ], + "iDNA" => [ "iDNA", "SO:0000723" ], + "intron" => [ "intron", "SO:0000188" ], + "mRNA" => [ "mRNA", "SO:0000234" ], + "mat_peptide" => [ "mature_protein_region", "SO:0000419" ], + "misc_RNA" => [ "transcript", "SO:0000673" ], + "misc_binding" => [ "binding_site", "SO:0000409" ], + "misc_difference" => [ "sequence_difference", "SO:0000413" ], + "misc_feature" => [ "region", "SO:0000001" ], + "misc_recomb" => [ "recombination_feature", "SO:0000298" ], + "misc_signal" => [ "regulatory_region", "SO:0005836" ], + "misc_structure" => [ "sequence_secondary_structure", "SO:0000002" ], + "modified_base" => [ "modified_DNA_base", "SO:0000305" ], + "operon" => [ "operon", "SO:0000178" ], + "oriT" => [ "origin_of_transfer", "SO:0000724" ], + "polyA_signal" => [ "polyA_signal_sequence", "SO:0000551" ], + "polyA_site" => [ "polyA_site", "SO:0000553" ], + "precursor_RNA" => [ "primary_transcript", "SO:0000185" ], + "prim_transcript" => [ "primary_transcript", "SO:0000185" ], + "primer_bind" => [ "primer_binding_site", "SO:0005850" ], + "promoter" => [ "promoter", "SO:0000167" ], + "protein_bind" => [ "protein_binding_site", "SO:0000410" ], + "rRNA" => [ "rRNA", "SO:0000252" ], + "repeat_region" => [ "repeat_region", "SO:0000657" ], + "repeat_unit" => [ "repeat_unit", "SO:0000726" ], + "satellite" => [ "satellite_DNA", "SO:0000005" ], + "scRNA" => [ "scRNA", "SO:0000013" ], + "sig_peptide" => [ "signal_peptide", "SO:0000418" ], + "snRNA" => [ "snRNA", "SO:0000274" ], + "snoRNA" => [ "snoRNA", "SO:0000275" ], + "source" => [ "contig", "SO:0000149" ], # manually modified + "stem_loop" => [ "stem_loop", "SO:0000313" ], + "tRNA" => [ "tRNA", "SO:0000253" ], + "terminator" => [ "terminator", "SO:0000141" ], + "transit_peptide" => [ "transit_peptide", "SO:0000725" ], + "variation" => [ "sequence_variant", "SO:0000109" ], +); + + +sub get_gff3_file { + my ($self) = @_; + my @output; + for my $header_line ( @{ $self->header() } ) { + push( @output, $header_line ); + } + push( @output, join( "\t", @{ $self->get_source } ) ); + for my $data_line ( @{ $self->data() }) { + push( @output, join( "\t", @{$data_line} ) ); + } + push( @output, '##FASTA' ); + push( @output, '>' . $self->seqid() ); + my $seq = $self->sequence(); + $seq =~ s/(.{80})/$1\n/g; + push( @output, $seq ); + return join( "\n", @output ); +} + +sub escape { + my ($self, $str) = @_; + $str =~ s/,/%2C/g; + $str =~ s/=/%3D/g; + $str =~ s/;/%3B/g; + $str =~ s/\t/%09/g; + return $str; +} + +sub FT_SO_map { + my ( $self, $key ) = @_; + if ( $key_mapping{$key} ) { + my @result = @{ $key_mapping{$key} }; + return $result[0]; + } + else { + return 'region'; + } +} + +sub source_map { + my ( $self, $type ) = @_; + if ( $self->{'override_source'} ) { + return $self->{'override_source'}; + } + if ( $self->{'annotation_software'}{$type} ) { + return $self->{'annotation_software'}{$type}; + } + else { + return '.'; + } +} + +sub get_attrs { + my ( $self, %data ) = @_; + my $feature = $data{'feat'}; + my $parents_ref = $data{'parents'}; + + my %attrs = (); + $attrs{'ID'} = + $self->id_prefix() . '.' . ($self->global_feat_idx()); + + $self->global_feat_idx($self->global_feat_idx()+1); + + # Handle Identifier + my $identifier = $bio->_getIdentifier($feature); + if ( $identifier ne 'ERROR' ) { + $attrs{'Name'} = $identifier . '.' . $feature->primary_tag; + } + + # Handle parents, if there are any + if ($parents_ref) { + if ( ref($parents_ref) eq 'ARRAY' ) { + $attrs{'Parent'} = $parents_ref; + } + else { + $attrs{'Parent'} = [$parents_ref]; + } + } + + # These are otherwise "Special" keys that need to be handled differently. + if ( $feature->has_tag('note') ) { + my @notes = $feature->get_tag_values('note'); + $attrs{'Note'} = \@notes; + } + if ( $feature->has_tag('db_xref') ) { + my @dbxref = $feature->get_tag_values('db_xref'); + $attrs{'Dbxref'} = \@dbxref; + } + + # Do the rest + for my $tag ( $feature->get_all_tags() ) { + + # If not one of the specially handled ones + if ( $tag ne 'name' && $tag ne 'note' && $tag ne 'db_xref' ) { + + # If not a reserved_key + if ( !$reserved_keys{$tag} ) { + my @vals = $feature->get_tag_values($tag); + $attrs{lc($tag)} = \@vals; + } + else { + warn +"Trying to set a reserved key $tag with value $attrs{$tag}"; + } + } + } + my %response = ( + id => $attrs{'ID'}, + attr_str => $self->post_process_attribute_string(%attrs), + ); + return %response; +} + +sub post_process_attribute_string { + my ($self,%attrs) = @_; + my @parts = (); + for my $k ( keys %attrs ) { + + # IGNORED TAGS + if ( $k ne 'translation' && $k ne 'product' ) { + my $joined = + $self->escape_and_join_attribute_subpart( $attrs{$k} ); + push @parts, "$k=$joined"; + } + } + + #print STDERR join("\t",@parts),"\n"; + return join( ";", @parts ); + +} + +sub escape_and_join_attribute_subpart { + my ($self,$ref) = @_; + if ( ref($ref) eq 'ARRAY' ) { + my @attrs = @{$ref}; + return join( ",", map { $self->escape($_) } @attrs ); + } + else { #scalar + return $self->escape($ref); + } +} + +sub get_source { + my ($self) = @_; + if ( !$self->source() ) { + $self->auto_source(); + } + return $self->source(); +} + +sub auto_source { + + # Auto generate a source feature, in case there isn't one. + my ($self) = @_; + my @region = ( + $self->seqid(), + ( $self->genbank() ? 'Genbank' : 'Assembly' ), + 'contig', + 1, + $self->get_length, + '.', + '.', + '.', + sprintf( "ID=%s;Name=%s", $self->seqid(), $self->seqid() ) + . ( $self->is_circular() ? ";Is_circular=True" : "" ) + ); + $self->source(\@region); +} + +sub add_feature { + my ( $self, $feat ) = @_; + my $primary_tag = $feat->primary_tag; + if ( $primary_tag eq 'CDS' ) { + my ( $id, $data_0 ) = $self->_add_gene( feat => $feat, ); + my ($data_1) = $self->_add_feature( + feat => $feat, + parent => $id, + ); + $self->_low_level_add_feature($data_0); + $self->_low_level_add_feature($data_1); + } + elsif ( $primary_tag eq 'source' ) { + my ($data) = $self->_add_feature( feat => $feat, ); + + # YUCK. + my @z = @{$data}; + my $seqid = $self->seqid(); + $z[8] =~ s/ID=[^;]*;/ID=$seqid;/g; + $self->source( \@z ); + } + elsif ( $primary_tag ne 'gene' ) { + my ($data) = $self->_add_feature( feat => $feat, ); + $self->_low_level_add_feature($data); + } + +} + +sub _low_level_add_feature { + my ( $self, $data ) = @_; + push( @{ $self->data() }, $data ); +} + +sub _add_feature { + my ( $self, %data ) = @_; + my %attrs = + $self->get_attrs( feat => $data{feat}, parents => $data{parent} ); + my @data = ( + $self->seqid(), + $self->source_map( $data{feat}->primary_tag ), + $self->FT_SO_map( $data{feat}->primary_tag ), + $data{feat}->start, + $data{feat}->end, + '.', + ( $data{feat}->strand == 1 ? '+' : '-' ), + '.', + $attrs{attr_str} + ); + + #$self->_low_level_add_feature(\@data); + return \@data; +} + +sub _add_gene { + my ( $self, %data ) = @_; + my $id = $self->id_prefix() . '.' . ( $self->global_feat_idx()); + $self->global_feat_idx($self->global_feat_idx()+1); + my $gene_count = ++$feat_type_count{'gene'}; + my @data = ( + $self->seqid(), + $self->source_map( $data{feat}->primary_tag ), + $self->FT_SO_map('gene'), + $data{feat}->start, + $data{feat}->end, + '.', + ( $data{feat}->strand == 1 ? '+' : '-' ), + '.', + "ID=$id;Name=Gene$gene_count" + ); + + #$self->_low_level_add_feature(\@data); + return ( $id, \@data ); +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::GBK2GFF3 + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Galaxy.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Galaxy.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,284 @@ +package CPT::Galaxy; +use Moose; +use strict; +use warnings; +use Data::Dumper; +use autodie; + + +sub gen { + my ( $self, %p ) = @_; + my $parameterCollection = $p{full_options}; + my @opt_spec = @{ $parameterCollection->params() }; # This feels bad? + my %defaults = @{ $p{defaults} }; + my @outputs = @{ $p{outputs} }; + my @tests; + if(defined $p{tests} && ref $p{tests} eq 'ARRAY'){ + @tests = @{ $p{tests} }; + } + + my $optional_output_file = $p{_output_file}; + + #my @registered_outputs = @{ $outputs{'registered'} }; + my $appid = $p{appid}; + my $appname = $p{appname}; + my $appdesc = $p{appdesc}; + my $appvers = $p{appvers}; + + # Set up the XML Writer + require XML::Writer; + my $xml_writer; + if($optional_output_file){ + $xml_writer = XML::Writer->new(OUTPUT => $optional_output_file); + }else{ + $xml_writer = XML::Writer->new(); + } + + # Set up the tool element + $xml_writer->startTag( + 'tool', + id => $appid, + name => $appname, + version => $appvers, + ); + + # Add all of our sections, passing a single xml_writer around. + $self->description_section($xml_writer,$appdesc); + $self->version_section($xml_writer); + $self->stdio_section($xml_writer); + + $self->command_section($xml_writer,\@opt_spec); + $self->input_section($xml_writer,\@opt_spec); + $self->output_section($xml_writer,\@opt_spec); + + $self->help_section($xml_writer); + + $self->test_section($xml_writer, @tests); + + $xml_writer->endTag('tool'); + $xml_writer->end(); + # End of tool xml conf + + # if OOF was set to 'self', that means it's stored internally, so we should return + if(defined $optional_output_file && $optional_output_file eq 'self'){ + return $xml_writer->to_string; + } +} + +sub test_section { + my ($self, $xml_writer, @test_cases) = @_; + $xml_writer->startTag('tests'); + foreach my $test(@test_cases){ + my %test_details = %{$test}; + $xml_writer->startTag('test'); + # Each test case has: name, params, outputs + + # Params will be as they're specified on the command line, so they /should/ be okay to use in galaxy code. + my %params = %{$test_details{'params'}}; + foreach(sort(keys(%params))){ + # As written, will not handle multiply valued attributes + $xml_writer->startTag('param', + name => $_, + value => $params{$_}, + ); + $xml_writer->endTag(); + } + # outputs + my %outputs = %{$test_details{'outputs'}}; + foreach(sort(keys(%outputs))){ + # As written, will not handle multiple outputs well + # This bit of code because for every output there's a + # name you expect on the command line, and a file you + # want to compare against (galaxy mucks about with + # names so we don't have to worry about it. However, + # from the command line, we have to know the name of + # the output file we're going to produce so we can + # compare it against another copy of this file. It's + # less than ideal, but there's not much we can do. + my @output_cmp = @{$outputs{$_}}; + $xml_writer->startTag('output', + name => $_, + file => $output_cmp[1], + ); + $xml_writer->endTag(); + } + $xml_writer->endTag(); + } + $xml_writer->endTag(); +} + +sub description_section{ + my ($self, $xml_writer, $appdesc) = @_; + $xml_writer->startTag('description'); + $xml_writer->characters(sprintf('%s',$appdesc)); + $xml_writer->endTag('description'); +} + +sub version_section{ + my ($self, $xml_writer) = @_; + $xml_writer->startTag('version_command'); + $xml_writer->characters("perl $0 --version"); + $xml_writer->endTag('version_command'); +} +sub stdio_section{ + my ($self, $xml_writer) = @_; + $xml_writer->startTag('stdio'); + $xml_writer->startTag( + 'exit_code', + range => "1:", + level => "fatal", + ); + $xml_writer->endTag('exit_code'); + $xml_writer->endTag('stdio'); +} +sub command_section{ + ################### + # COMMAND SECTION # + ################### + my ($self, $xml_writer,$opt_spec_ref) = @_; + my @opt_spec = @{$opt_spec_ref}; + $xml_writer->startTag( + 'command', + interpreter => 'perl', + ); + my $command_string = join("\n", $0, '--galaxy','--outfile_supporting $__new_file_path__',''); + foreach (@opt_spec) { + if( + # not galaxy specific and we are not instructed to hide + !$_->_galaxy_specific() && $_->_show_in_galaxy() + || + # is galaxy specific and is hidden + $_->_galaxy_specific() && $_->hidden() && $_->_show_in_galaxy() + ){ + #if(!$_->hidden() || ){ + my $command_addition = $_->galaxy_command(); + if($command_addition){ + $command_string .= $command_addition . "\n"; + } + } + } + $xml_writer->characters($command_string); + $xml_writer->endTag('command'); +} +sub input_section{ + my ($self, $xml_writer,$opt_spec_ref) = @_; + my @opt_spec = @{$opt_spec_ref}; + ################# + # INPUT SECTION # + ################# + $xml_writer->startTag('inputs'); + foreach (@opt_spec) { + if( + # not galaxy specific and we are not instructed to hide + !$_->hidden() && !$_->_galaxy_specific() && $_->_show_in_galaxy() + ){ + $_->galaxy_input($xml_writer); + } + } + $xml_writer->endTag('inputs'); +} +sub output_section{ + my ($self, $xml_writer,$opt_spec_ref) = @_; + my @opt_spec = @{$opt_spec_ref}; + ################## + # OUTPUT SECTION # + ################## + $xml_writer->startTag('outputs'); + foreach (@opt_spec) { + if( + # not galaxy specific and we are not instructed to hide + !$_->_galaxy_specific() && $_->_show_in_galaxy() + ){ + $_->galaxy_output($xml_writer); + } + } + $xml_writer->endTag('outputs'); +} +sub help_section{ + my ($self, $xml_writer) = @_; + ################ + # HELP SECTION # + ################ + + $xml_writer->startTag('help'); + # Here we incur some dependencies. D: + use IPC::Run3; + my ($in,$out,$err); + use File::Temp; + my $tempfile = File::Temp->new( + TEMPLATE => 'libcpt.galaxy.tempXXXXX', + DIR => '/tmp/', + UNLINK => 1, + SUFFIX => '.html' + ); + + use File::Which; + my $pod2md = which("pod2markdown"); + if(! defined($pod2md)){ + print STDERR "pod2markdown not available. Install Pod::Markdown"; + }else{ + my @command = ('pod2markdown',$0,$tempfile); + run3 \@command, \$in, \$out, \$err; + # Pandoc + my $pandoc = which("pandoc"); + if(! defined($pandoc)){ + print STDERR "Pandoc not available, cannot convert to RST"; + }else{ + @command = ("pandoc",'-f','markdown','-t','rst', $tempfile); + run3 \@command, \$in, \$out, \$err; + if(-e $tempfile){ + unlink($tempfile); + } + $xml_writer->characters($out); + } + } + $xml_writer->endTag('help'); +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Galaxy + +=head1 VERSION + +version 1.99.4 + +=head2 gen + + require CPT::Galaxy; + my $galaxy_xml_generator = CPT::Galaxy->new(); + $galaxy_xml_generator->gen( + full_options => \@options_specification, + appdesc => $self->{'appdesc'}, + appid => $self->{'appid'}, + appname => $self->{'appname'}, + defaults => $passed_opts{'defaults'}, + outputs => $passed_opts{'outputs'}, + ); + +Generates a galaxy XML file (using XML::Writer) from the options_specification object, which is an array of +['file|f=s', "blah", {some_req => 'some_val'] and CPT::Parameter::* objects. For simplicity, the first type +is currently DEPRECATED + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/GalaxyGetOpt.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/GalaxyGetOpt.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,384 @@ +package CPT::GalaxyGetOpt; +use CPT::ParameterGroup; +use autodie; +use File::Spec::Functions qw[ catfile catdir ]; +use Carp; +no warnings; +use Moose; + +has 'appdesc' => ( is => 'rw', isa => 'Str', default => sub { 'No application description provided' } ); +has 'appid' => ( is => 'rw', isa => 'Str', default => sub { "unknown_app_$0" } ); +has 'appname' => ( is => 'rw', isa => 'Str', default => sub { "Unnamed application $0"} ); +has 'appvers' => ( is => 'rw', isa => 'Str', default => sub { '0.0.1' } ); +has 'registered_outputs' => ( is => 'rw', isa => 'HashRef' ); +has 'opt' => ( is => 'rw', isa => 'Any'); +has filetype_detector => ( + is => 'rw', + isa => 'Any', +); + + +sub getOptions { + my ( $self, %passed_opts ) = @_; + require Getopt::Long::Descriptive; + # these get pushed through, thankfully. This way we can manually check for --genenrate_galaxy_export and exit slightly earlier. + my %args = map { $_ => 1 } @ARGV; + # Sections passed to us + my @script_args = @{ $passed_opts{'options'} }; + my %defaults = @{ $passed_opts{'defaults'} }; + + + + # Output Files Stuff + my @getopt_formatted_outputs; + my %outputs; + # This is originally an array of output files. We transform to a hash for + # easier lookup + foreach(@{$passed_opts{'outputs'}}){ + # Grab name/desc/opts + my ($name, $desc, $opts) = @{$_}; + $outputs{$name} = { + description => $desc, + options => $opts, + required => 1, + }; + my %tmp_opts_fix = %{$opts}; + $tmp_opts_fix{required} = 1; + $tmp_opts_fix{validate} = 'File/Output'; + push(@getopt_formatted_outputs, [$name, $desc, \%tmp_opts_fix]); + + # Adds the {name}_files_path special parameter + my %fp_opts = %{$opts}; + $fp_opts{hidden} = 1; + $fp_opts{value} = "${name}.files_path"; + $fp_opts{default} = "${name}.files_path"; + $fp_opts{required} = 1; + $fp_opts{_galaxy_specific} = 1; + $fp_opts{validate} = "String"; + #$fp_opts{_show_in_galaxy} = 0; + my $desc2 = "Associated HTML files for $name"; + my $name2 = "${name}_files_path"; + my @files_path = ($name2, $desc2, \%fp_opts); + push(@getopt_formatted_outputs, \@files_path); + + # Adds the {name}_format special parameter + my %fp_opts3 = %{$opts}; + #$fp_opts3{hidden} = 1; + $fp_opts3{default} = $fp_opts3{"default_format"}; + $fp_opts3{validate} = "File/OutputFormat"; + $fp_opts{required} = 1; + #$fp_opts3{_galaxy_specific} = 1; + #$fp_opts3{_show_in_galaxy} = 0; + my $desc3 = "Associated Format for $name"; + my $name3 = "${name}_format"; + my @files_path3 = ($name3, $desc3, \%fp_opts3); + push(@getopt_formatted_outputs, \@files_path3); + + # Adds the {name}_files_path special parameter + my %fp_opts4 = %{$opts}; + $fp_opts4{hidden} = 1; + $fp_opts4{value} = "${name}.id"; + $fp_opts4{default} = "${name}.id"; + $fp_opts4{validate} = "String"; + $fp_opts4{_galaxy_specific} = 1; + #$fp_opts4{_show_in_galaxy} = 0; + my $desc4 = "Associated ID Number for $name"; + my $name4 = "${name}_id"; + my @files_path4 = ($name4, $desc4, \%fp_opts4); + push(@getopt_formatted_outputs, \@files_path4); + + } + $self->registered_outputs(\%outputs); + + + # Store the application's name and description + if($defaults{appdesc}){ + $self->appdesc($defaults{appdesc}); + } + if($defaults{appid}){ + $self->appid($defaults{appid}); + } + if($defaults{appname}){ + $self->appname($defaults{appname}); + } + if($defaults{appvers}){ + $self->appvers($defaults{appvers}); + } + + my $usage_desc; + if ( $self->appname() && $self->appdesc() ) { + $usage_desc = sprintf( "%s: %s\n%s [options] ", $self->appname(), $self->appdesc(), $0 ); + } + + # Individual parameter parsers + require CPT::Parameter; + # Which are stored in a collection of them + require CPT::ParameterCollection; + + + my $parameterCollection = CPT::ParameterCollection->new(); + + $parameterCollection->push_params( + [ + #['Standard Options'], + #@extra_params, + #[], + ['outfile_supporting' , 'File or folder to output to, necessary when (class == Report || multiple calls to classyReturnResults || multiple standalone output files)' , { hidden => 1, validate => 'String', default => '__new_file_path__', _galaxy_specific => 1, _show_in_galaxy => 0 }], + ['galaxy' , 'Run with galaxy-specific overrides' , {validate => 'Flag' , hidden=>1, _galaxy_specific => 1, _show_in_galaxy => 0}], + ['generate_galaxy_xml' , 'Generate a compatible galaxy-xml file. May need editing' , {validate => 'Flag' , hidden=>1, _galaxy_specific => 1, _show_in_galaxy => 0}], + [], + ['Script Options'], + ] + ); + + # If there's a default specified, we should apply that. + foreach (@script_args) { + # If it's an array, push as is, because either it's old style/label/empty + # However, this doesn't allow setting defaults for things that aren't parameters + # and it won't set defaults for old style + if ( ref $_ eq 'ARRAY' ) { + $parameterCollection->push_param($_); + }elsif( ref $_ eq 'HASH'){ + my $pG = CPT::ParameterGroup->new(); + $pG->set_data($_); + $parameterCollection->push_group($pG); + } + } + + # Our magic output files stuff + $parameterCollection->push_params( + [ + [], + ['Output Files'], + @getopt_formatted_outputs, + ] + ); + + # Other standard options like verbosity/version/etc + $parameterCollection->push_params( + [ + [], + ['Other Standard Options'], + ['verbose|v', 'Be more verbose', {validate => 'Flag', _show_in_galaxy => 0}], + ['version', 'Print version information', {validate => 'Flag', _show_in_galaxy => 0}], + ['help', 'Print usage message and exit', {validate => 'Flag', _show_in_galaxy => 0}], + ], + ); + + # If we want the galaxy_xml, do that before the reduction step is called. + if ( $args{'--version'} ) { + print $self->appvers() . "\n"; + exit 1; + } + + # If we want the galaxy_xml, do that before the reduction step is called. + if ( $args{'--generate_galaxy_xml'} ) { + require CPT::Galaxy; + my $galaxy_xml_generator = CPT::Galaxy->new(); + $galaxy_xml_generator->gen( + full_options => $parameterCollection, + appdesc => $self->appdesc(), + appid => $self->appid(), + appname => $self->appname(), + appvers => $self->appvers(), + defaults => $passed_opts{'defaults'}, + outputs => $passed_opts{'outputs'}, + tests => $passed_opts{'tests'}, + ); + exit 1; + } + + if( $args{'--gen_test'} ){ + require CPT::GenerateTests; + my $tgen = CPT::GenerateTests->new(); + if(defined $passed_opts{'tests'}){ + print $tgen->gen(@{$passed_opts{'tests'}}); + }else{ + print $tgen->gen_empty(); + } + exit 0; + } + + # Now that the options_spec is complete, we reduce to something getopt will be happy with + my @getopt_spec = $parameterCollection->getopt(); + #print STDERR Dumper \@getopt_spec; + #exit 1; + + # It would be nice if there was a way to ensure that it didn't die here... + # Execute getopt + my ( $local_opt, $usage ) = Getopt::Long::Descriptive::describe_options( $usage_desc, @getopt_spec ); + $self->opt($local_opt); + + # Now that we've gotten the user's options, we need to copy their values back to our ParameterCollection + $parameterCollection->populate_from_getopt($self->opt()); + + # If they want help, print + exit + if ( $self->opt && $self->opt->help ) { + print $usage; + exit 1; + } + # Validate their choices + if ( $parameterCollection->validate($self->opt) ) { + return $self->opt; + # Easy access for the script, don't want them to have to deal with PC (just yet) + } + else { + # Some params failed to validate, so we die. + croak "Validation errors were found so cannot continue"; + } +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::GalaxyGetOpt + +=head1 VERSION + +version 1.99.4 + +=head2 getOptions + + my $ggo = CPT::GalaxyGetOpt->new(); + my $options = $ggo->getOptions( + 'options' => [ + [ 'file', 'Input file', { validate => 'File/Input' } ], + [ + "option" => "Select an option!", + { + validate => 'Option', + options => \%options, + multiple => 1, + } + ], + [ + "float" => "I'm a float", + { validate => 'Float' } + ], + [ + "int" => "I'm an int", + { validate => 'Int', default => [42, 34], required => 1, multiple => 1 } + ], + [], + ['New section'], + [ + "string" => "I'm a simple string", + { validate => 'String' } + ], + [ + 'flag' => 'This is a flag', + { validate => 'Flag' } + ], + ], + 'outputs' => [ + [ + 'my_output_data', + 'Output TXT File', + { + validate => 'File/Output', + required => 1, + default => 'out', + data_format => 'text/plain', + default_format => 'TXT', + } + ], + ], + 'defaults' => [ + 'appid' => 'TemplateScript', + 'appname' => 'Template', + 'appdesc' => 'A basic template for a new CPT application', + 'appvers' => '1.94', + ], + 'tests' => [ + { + test_name => "Default", + params => { + }, + outputs => { + 'my_output_data' => ["out.txt", 'test-data/outputs/template.default.txt' ], + }, + }, + { + test_name => "Option A specified", + params => { + 'int', '10000', + }, + outputs => { + 'my_output_data' => ["out.txt", 'test-data/outputs/template.A.txt' ], + }, + }, + ], + ); + + my @data; + foreach(qw(file int string option float flag)){ + # Create a 2D array of all of our optoins + push(@data, [ $_, $options->{$_}]); + } + + my %table = ( + 'Sheet1' => { + header => [qw(Key Value)], + data => \@data, + } + ); + + # And store it to file! + use CPT::OutputFiles; + my $crr_output = CPT::OutputFiles->new( + name => 'my_output_data', + GGO => $ggo, + ); + $crr_output->CRR(data => \%table); + +Gets command line options, and prints docs. Very convenient, removes the burden of writing any Getopt code from you. + +=head3 Default Options Provided + +=over 4 + +=item * + +help, man - cause an early exit and printing of the POD for your script. + +=item * + +verbose - flag to make the script verbose. Will print everything that is sent to returnResults. Individual scripts should take advantage of this option + +=back + +=head3 Advanced Options Provided + +=over 4 + +=item C<--generate_galaxy_xml> + +Generates valid XML for the tool for use in Galaxy + +=item C<--gen_test> + +Generates code to test the script using specified test data + +=back + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/GenerateTests.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/GenerateTests.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,151 @@ +package CPT::GenerateTests; +use Moose; +use strict; +use warnings; +use autodie; + + +sub gen { + my ( $self, @tests ) = @_; + + my $test_count = 0; + + # Header + my @outtext = ( + '#!/usr/bin/env perl', + 'use strict;', + 'use warnings;', + 'use Test::More tests => 0;', + 'use IPC::Run3 qw(run3);', + '', + 'my ( @base, @cmd, $in, $out, $err );', + '', + sprintf("%s = ('perl', '%s');", '@base', $0), + 'my %result_files = (', + ); + my %test_names; + # Loop across tests + foreach my $test_ref(@tests){ + my %test = %{$test_ref}; + my %params = %{$test{params}}; + my $command_line = ""; + foreach(sort(keys(%params))){ + $command_line .= "--$_ $params{$_} "; + } + my %outputs = %{$test{outputs}}; + + $test_names{$test{test_name}}++; + if($test_names{$test{test_name}} > 1){ + printf STDERR "Duplicate test found: %s. This will cause fewer tests to be run than expected\n", $test{test_name}; + } + push(@outtext, sprintf(' "%s" => {', $test{test_name})); + push(@outtext, sprintf(' command_line => "%s",', $command_line)); + push(@outtext, ' outputs => {'); + + foreach my $key(keys %outputs){ + push(@outtext, sprintf(' "%s" => ["%s", "%s"],', $key, @{$outputs{$key}})); + # Add another test + $test_count++; + $test_count++; + } + push(@outtext, ' },'); + push(@outtext, ' },'); + }; + push(@outtext, ');'); + push(@outtext,''); + + push(@outtext, 'foreach ( keys(%result_files) ) {'); + push(@outtext, ' # run with the command line'); + push(@outtext, ' my @cmd1 = ( @base, split( / /, $result_files{$_}{command_line} ) );'); + push(@outtext, ' run3 \@cmd1, \$in, \$out, \$err;'); + push(@outtext, ' if($err){ print STDERR "Exec STDERR: $err"; }'); + push(@outtext, ' if($out){ print STDERR "Exec STDOUT $out"; }'); + push(@outtext, ' # and now compare files'); + push(@outtext, ' foreach my $file_cmp ( keys( %{$result_files{$_}{outputs}} ) ) {'); + push(@outtext, ' my ($gen, $static) = @{$result_files{$_}{outputs}{$file_cmp}};'); + push(@outtext, ' my @diff = ( "diff", $gen, $static );'); + push(@outtext, ' my ($in_g, $out_g, $err_g);'); + push(@outtext, ' run3 \@diff, \$in_g, \$out_g, \$err_g;'); + push(@outtext, ' if($err_g) { print STDERR "err_g $err_g\n"; }'); + push(@outtext, ' if($out_g) { print STDOUT "out_g $out_g\n"; }'); + push(@outtext, ' chomp $out_g;'); + push(@outtext, ' is( -e $gen, 1, "[$_] Output file must exist"); '); + push(@outtext, ' is( length($out_g), 0, "[$_] Checking validity of output \'$file_cmp\'" );'); + push(@outtext, ' unlink $gen;'); + push(@outtext, ' }'); + push(@outtext, '}'); + + # Update test counts + $outtext[3] = "use Test::More tests => $test_count;"; + if($test_count == 0){ + return $self->gen_empty(); + } + + return join("\n", @outtext); +} + +sub gen_empty { + my ( $self ) = @_; + + my $test_count = 0; + + my @outtext = ( + '#!/usr/bin/env perl', + 'use strict;', + 'use warnings;', + 'use Test::More skip_all => "No tests defined for ' . $0 .'"', + ); + return join("\n", @outtext); +} + + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::GenerateTests + +=head1 VERSION + +version 1.99.4 + +=head2 gen + + require CPT::GenerateTests; + my $tgen = CPT::GenerateTests->new(); + $tgen->gen( + { + test_name => "Default", + params => { + 'file' => 't/test-files/aa.gbk', + 'chromosome' => 'test', + 'color' => 'red', + 'intensity' => 'vvvvl', + }, + outputs => { + 'result_name' => ['circos_k.txt', 'test-data/circos_k.txt'], + } + }, + ); + exit 1; + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Logger.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Logger.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,64 @@ +package CPT::Logger; +use Moose; +use strict; +use warnings; +use autodie; + +# This will eventually be merged into CPT proper. + +has 'outfile' => ( is => 'ro', isa => 'Str' ); +has 'appid' => ( is => 'ro', isa => 'Str' ); +has 'outfilehandle' => ( is => 'ro', isa => 'FileHandle' ); + +my $fh; + +sub new { + my ( $class, %options ) = @_; + my $self = {%options}; + bless( $self, $class ); + if ( $self->{'outfile'} ) { + open( $fh, '>>', $self->{'outfile'} ); + } + return $self; +} + +sub log { + my ( $self, $message ) = @_; + my @t = localtime; + $t[5] += 1900; + $t[4]++; + my $time = sprintf "%04d-%02d-%02d %02d:%02d:%02d", + @t[ 5, 4, 3, 2, 1, 0 ]; + print $fh "[$time] $message\n"; +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Logger + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/OutputFiles.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/OutputFiles.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,737 @@ +package CPT::OutputFiles; +use Carp; +use Moose; +use strict; +use warnings; +use autodie; +use File::Spec; + + +# ABSTRACT: Handles script outputs in a sane way, providing facilities to format data and name files for regular use, or in galaxy. + + +# A list of acceptable ouput formats. Some/many of these may be missing implementations. +# For instance, the pandoc output format is completely unimplemented +# These will NEED to be re-worked. +has 'acceptable_formats' => ( + is => 'ro', + isa => 'HashRef', + default => sub { + { + 'text/tabular' => [qw(TSV TSV_U CSV CSV_U XLS ODS Dumper JSON YAML XLSX)], + 'genomic/annotated' => [qw(ABI Ace AGAVE ALF AsciiTree BSML BSML_SAX ChadoXML Chaos ChaosXML CTF EMBL EntrezGene Excel Exp Fasta Fastq GAME GCG Genbank Interpro KEGG LargeFasta LaserGene LocusLink PHD PIR PLN Qual Raw SCF SeqXML Strider Swiss Tab TIGR TIGRXML TinySeq ZTR)], + 'genomic/raw' => [qw(Fasta)], + 'genomic/interval' => [qw(GFF3)], + 'text/html' => [qw(HTML)], # Theoretically this will be consumed by text/report + 'text/report' => [qw(Pandoc)], + 'text/plain' => [qw(TXT CONF)], + 'image/svg' => [qw(SVG)], + 'image/png' => [qw(PNG)], + 'archive' => [qw(tar.gz zip tar)], + 'Dummy' => [qw(Dummy)], + } + } +); + +has 'format_mapping' => ( + is => 'ro', + isa => 'HashRef', + default => sub { + { + 'TSV' => 'tabular', + 'CSV' => 'tabular', + 'TSV_U' => 'tabular', + 'CSV_U' => 'tabular', + 'XLS' => 'data', + 'ODS' => 'data', + 'Dumper' => 'txt', + 'JSON' => 'txt', + 'YAML' => 'txt', + 'XLSX' => 'data', + 'Fasta' => 'fasta', + 'GFF3' => 'interval', + 'HTML' => 'html', + 'Pandoc' => 'txt', + 'TXT' => 'txt', + 'CONF' => 'txt', + 'SVG' => 'xml', + 'PNG' => 'png', + 'Dummy' => 'data', + 'tar.gz' => 'tar.gz', + 'zip' => 'zip', + 'tar' => 'tar', + #Genomic formats + 'ABI' => 'data', + 'Ace' => 'txt', + 'AGAVE' => 'xml', + 'ALF' => '', + 'AsciiTree' => 'txt', + 'BSML' => 'xml', + 'BSML_SAX' => 'xml', + 'ChadoXML' => 'xml', + 'Chaos' => 'xml', + 'ChaosXML' => 'xml', + 'CTF' => 'data', + 'EMBL' => 'txt', + 'EntrezGene' => 'txt', + 'Excel' => 'data', + 'Exp' => 'txt', + 'Fastq' => 'fastq', + 'GAME' => 'xml', + 'GCG' => 'txt', + 'Genbank' => 'txt', + 'Interpro' => 'xml', + 'KEGG' => 'txt', + 'LargeFasta' => 'txt', + 'LaserGene' => 'data', + 'LocusLink' => 'data', + 'PHD' => 'data', + 'PIR' => 'data', + 'PLN' => 'data', + 'Qual' => 'data', + 'Raw' => 'txt', + 'SCF' => 'data', + 'SeqXML' => 'xml', + 'Strider' => 'data', + 'Swiss' => 'txt', + 'Tab' => 'tabular', + 'TIGR' => 'xml', + 'TIGRXML' => 'xml', + 'TinySeq' => 'xml', + 'ZTR' => 'data', + } + } +); + +sub valid_formats { + my ($self, $format) = @_; + return ${$self->acceptable_formats()}{$format}; +} + +sub get_format_mapping{ + my ($self, $format) = @_; + return ${$self->format_mapping()}{$format}; +} + + +# User supplied options +has 'name' => ( is => 'ro', isa => 'Str' ); +has 'GGO' => ( is => 'ro', isa => 'Any' ); +has 'galaxy' => (is => 'rw', isa => 'Bool'); + +# These are extracted on init from from CPT +has 'output_id' => (is => 'rw', isa => 'Str'); +has 'output_label' => (is => 'rw', isa => 'Str'); +has 'output_opts' => (is => 'rw', isa => 'HashRef'); +# From galaxy +has 'new_file_path' => (is => 'rw', isa => 'Str'); +has 'files_path' => (is => 'rw', isa => 'Str'); +has 'files_id' => (is => 'rw', isa => 'Str'); +# ??? +has 'parent_filename' => (is => 'rw', isa => 'Str'); +has 'parent_internal_format' => (is => 'rw', isa => 'Str'); +has 'parent_default_output_format' => (is => 'rw', isa => 'Str'); + +has 'init_called' => (is => 'rw', isa =>'Bool'); + +sub initFromArgs { + my ($self, %args) = @_; + + # We will only ever care about one (as there is one of these objects + # per registered output) + my %registered_outputs = %{$self->GGO()->registered_outputs()}; + # If the output name specified in "name" was not known to registered_outputs + if(!defined($self->name())){ + croak("You must supply a name to the instantiation of CRR"); + } + #if(!defined($registered_outputs{$args{name}})){ + #croak("The script author tried to call GGO's classyReturnResults method with an output file not mentioned in the outputs section."); + #} + + # Carrying on + # We grab the pre-specified data regarding that output + my %reg_out_params = %{$registered_outputs{$self->name()}}; + # Store these for future calls of sub/var + $self->output_id($self->name()); + $self->output_label($reg_out_params{description}); + $self->output_opts($reg_out_params{options}); + + + $self->parent_internal_format($reg_out_params{options}{data_format}); + $self->parent_default_output_format($reg_out_params{options}{default_format}); + $self->parent_filename($reg_out_params{options}{default}); + + # Special variables + # --genemark "${genemark}" --genemark_format "${genemark_format}" + # --genemark_files_path "${genemark_files_path}" + # --genemark_id "${genemark_id}" + + # If they've specified a filename on the command line, that should + # override the default value + if(defined $self->GGO->opt->{$self->name()}){ + $self->parent_filename($self->GGO->opt->{$self->name()}); + } + # If they've specified a {str}_format option on the command line, that + # should override the default value + if(defined $self->GGO->opt->{$self->name() . '_format'}){ + $self->parent_default_output_format($self->GGO->opt->{$self->name() . '_format'}); + } + + # Grab supporting files path (added as new history items) + $self->new_file_path($self->GGO->opt->{outfile_supporting}); + # Copy galaxy specific variables + if(defined $self->GGO->opt->{$self->name() . '_files_path'}){ + $self->files_path($self->GGO->opt->{$self->name() . '_files_path'}); + } + if(defined $self->GGO->opt->{$self->name() . '_id'}){ + $self->files_id($self->GGO->opt->{$self->name() . '_id'}); + } + + # If --galaxy has been specified, we need to be aware of this + if ( $self->GGO->opt->{galaxy} ) { + $self->galaxy(1); + }else{ + $self->galaxy(0); + } + $self->init_called(1); +} + + + +has 'times_called' => ( is => 'rw', isa => 'Num', default => sub {0} ); +has 'naming_strategy' => ( is => 'rw', isa => 'Str', default => "norm" ); #Other options are "var" and "sub" + + +sub _genCRR { + my ($self, %args) = @_; + if(!$self->init_called()){ + $self->initFromArgs(%args); + } + + # If the user supplied a custom extension, pull that (useful in + # dummy/data output type) + if(defined $args{extension}){ + $self->extension($args{extension}); + } + + # This is a mandatory parameter + if(!defined $args{filename}){ + $self->given_filename($self->parent_filename()); + }else{ + $self->given_filename($args{filename}); + } + + # Allow overriding default format parameters + my $writer = $self->writer_for_format( + defined $args{data_format} ? $args{data_format} : $self->parent_internal_format(), + defined $args{format_as} ? $args{format_as} : $self->parent_default_output_format(), + ); + + # Ugh + $writer->OutputFilesClass($self); + if($args{'data'}){ + $writer->data( $args{'data'} ); + } + $writer->process_data(); + $writer->write(); + my @returned_filenames = @{$writer->used_filenames()}; + print STDERR join("\n",map{"FN: $_"} @returned_filenames)."\n"; + $self->bump_times_called(); + return @returned_filenames; +} + + +sub CRR { + my ( $self, %args ) = @_; + return $self->_genCRR(%args); +} + + +sub subCRR { + my ( $self, %args ) = @_; + # Change naming behaviour + $self->naming_strategy('sub'); + return $self->_genCRR(%args); +} + + +sub varCRR { + my ( $self, %args ) = @_; + # Change naming behaviour + $self->naming_strategy('var'); + return $self->_genCRR(%args); +} + + +sub bump_times_called{ + my ($self) = @_; + $self->times_called($self->times_called() + 1 ); + return $self->times_called(); +} + + +sub writer_for_format{ + my($self, $format, $requested) = @_; + + # For the specified data_format, grab the acceptable handlers for that format + my %acceptable = %{$self->acceptable_formats()}; + my %acceptable_handlers = map { $_ => 1 } @{ $acceptable{ $format } }; + + if (!$acceptable_handlers{$requested} ) { + carp(sprintf( "Unacceptable output format choice [%s] for internal" + ."data type for type %s. Acceptable formats are [%s]." + ."Alternatively, unacceptable output file.", $requested, $format, + join( ', ', keys(%acceptable_handlers) ) )); + } + + if ( $requested eq 'Dumper' ) { + require CPT::Writer::Dumper; + return CPT::Writer::Dumper->new(); + } + elsif ( $requested eq 'TSV' ) { + require CPT::Writer::TSV; + return CPT::Writer::TSV->new(); + } + elsif ( $requested eq 'CSV' ) { + require CPT::Writer::CSV; + return CPT::Writer::CSV->new(); + } + elsif ( $requested eq 'TSV_U' ) { + require CPT::Writer::TSV_U; + return CPT::Writer::TSV_U->new(); + } + elsif ( $requested eq 'CSV_U' ) { + require CPT::Writer::CSV_U; + return CPT::Writer::CSV_U->new(); + } + elsif ( $requested eq 'YAML' ) { + require CPT::Writer::YAML; + return CPT::Writer::YAML->new(); + } + elsif ( $requested eq 'JSON' ) { + require CPT::Writer::JSON; + return CPT::Writer::JSON->new(); + } + elsif ( $requested eq 'Pandoc' ) { + require CPT::Writer::Pandoc; + return CPT::Writer::Pandoc->new(); + } + elsif ( $requested eq 'XLS' ) { + require CPT::Writer::Spreadsheet::XLS; + return CPT::Writer::Spreadsheet::XLS->new(); + } + elsif ( $requested eq 'XLSX' ) { + require CPT::Writer::Spreadsheet::XLSX; + return CPT::Writer::Spreadsheet::XLSX->new(); + } + elsif ( $requested eq 'TXT' || $requested eq 'CONF' ) { + require CPT::Writer::TXT; + return CPT::Writer::TXT->new(); + } + elsif ( $acceptable_handlers{$requested} && $format eq 'genomic/annotated'){ + require CPT::Writer::Genomic; + return CPT::Writer::Genomic->new(format => $requested); + } + elsif ( $requested eq 'Fasta' ) { + require CPT::Writer::Fasta; + return CPT::Writer::Fasta->new(); + } + elsif ( $requested eq 'GFF3' ) { + require CPT::Writer::GFF3; + return CPT::Writer::GFF3->new(); + } + elsif ( $requested eq 'HTML' ) { + require CPT::Writer::HTML; + return CPT::Writer::HTML->new(); + } + elsif ( $requested eq 'SVG' ) { + require CPT::Writer::SVG; + return CPT::Writer::SVG->new(); + } + elsif ( $requested eq 'PNG' ) { + require CPT::Writer::Dummy; + return CPT::Writer::Dummy->new(); + } + elsif ( $requested eq 'Dummy' ) { + require CPT::Writer::Dummy; + return CPT::Writer::Dummy->new(); + } + elsif ( $requested eq 'tar.gz') { + require CPT::Writer::Archive; + return CPT::Writer::Archive->new( format => 'tar.gz' ); + } + elsif ( $requested eq 'zip') { + require CPT::Writer::Archive; + return CPT::Writer::Archive->new( format => 'zip' ); + } + elsif ( $requested eq 'tar') { + require CPT::Writer::Archive; + return CPT::Writer::Archive->new( format => 'tar' ); + } + else { + carp(sprintf("Data Format not yet supported [%s, %s]", $format, $requested)); + } +} + +# File extension +has 'extension' => ( is => 'rw', isa => 'Str'); +# What the user said this file was called. +has 'given_filename' => (is => 'rw', isa => 'Str'); + + + + +sub generate_galaxy_variable{ + my ($self) = @_; + unless( -d $self->new_file_path()){ + mkdir($self->new_file_path()); + } + my $filename =File::Spec->catfile( + $self->new_file_path(), + sprintf( "primary_%s_%s_visible_%s", $self->files_id(), $self->given_filename(), $self->extension()) + ); + return $filename; +} + + +sub generate_nongalaxy_variable{ + my ($self) = @_; + my $filename =File::Spec->catfile( + sprintf( "%s.%s", $self->given_filename(), $self->extension()) + ); + return $filename; +} + + +sub generate_galaxy_subfile { + my ($self) = @_; + unless( -d $self->files_path()){ + mkdir($self->files_path()); + } + my $filename =File::Spec->catfile( + $self->files_path, + sprintf( "%s.%s", $self->given_filename(), $self->extension()) + ); + return $filename; +} + + +sub generate_nongalaxy_subfile { + my ($self) = @_; + # they're pretty much equivalent for now + return $self->generate_galaxy_subfile(); +} + + +sub get_next_file{ + my ($self) = @_; + my $filename; + if ( $self->galaxy() ) { + # In which case we want to return the primary output file. + if ( $self->times_called() == 0 ) { + $filename = $self->parent_filename(); + } + else { + if ( $self->naming_strategy eq 'sub' ) { + $filename = $self->generate_galaxy_subfile(); + } + elsif($self->naming_strategy eq 'var') { + $filename = $self->generate_galaxy_variable(); + }else{ + confess("Unknown startegy for multiple output files: " . $self->naming_strategy()); + } + } + } + else # do NOT use galaxy overrides. Paths should be more...sane + { + # First time we request, should $filename = the primary value, which + # should be the file they specify. + if ( $self->times_called() == 0 ) { + $filename = $self->given_filename() . '.' . $self->extension(); + } + else { + if ( $self->naming_strategy eq 'sub' ) { + $filename = $self->generate_nongalaxy_subfile(); + } + elsif($self->naming_strategy eq 'var') { + $filename = $self->generate_nongalaxy_variable(); + }else{ + confess("Unknown startegy for multiple output files: " . $self->naming_strategy()); + } + } + } + return $filename; +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::OutputFiles - Handles script outputs in a sane way, providing facilities to format data and name files for regular use, or in galaxy. + +=head1 VERSION + +version 1.99.4 + +=head1 METHODS + +=head2 initFromArgs + + $o->initFromArgs(name => 'GGO_known_output_name', GGO => $GGO); + +Internal method to intialise data structures from the output id provide in C and the data accessible via the C object. You B have already called C<< $GGO->getOptions >> + +=head2 classyReturnResults + + # in $GGO->getOptions( + outputs => [ + ['html_page', 'HTML output page', + { + validate => 'File/Output', + default => 'aa', # will produce aa.html + data_format => 'text/html', + default_format => 'HTML' + } + ] + ['genbank_download', 'Variable number of GBK files', + { + validate => 'File/Output', + default => 'result', # will produce result.gbk + data_format => 'genomic/annotated', + default_format => 'Genbank' + } + ] + ] + # ) + + # Then in your script + $csv_output = CPT::OutputFiles->new( + name => 'html_page', + opt => $options, + ); + $csv_output->CRR( + data => $data + ); + # Subfile + my $loc = $csv_output->subCRR( + filename => 'cool_picture', + data_format=>'data', + extension=>"png" + ); + move($png_file,$loc); + + # You give subfiles a name in case you need to refer to them at any + # point in the parent file. + $csv_output->subCRR( + filename => 'output', + data => $svg_object, + data_format => 'image/svg', + format_as => 'SVG' + ); + + + $gbk_output = CPT::OutputFiles->new( + name => 'genbank_download', + opt => $options, + ); + while(my $individual_genbank = $large_seqio->next){ + $gbk_output->varCRR( + filename => $individual_genbank->seqid(), + data => $individual_genbank, + ); + } + +=head2 _genCRR + + _genCRR(extension => 'png', data => $data_ref, data_format => 'Dummy', + format_as => 'Dummy', filename => "my-image"); + +This is an internal method and should not be called directly. It's the end call of all C, C, and C. Those methods should be used instead. + +This method + +=over 4 + +=item Stores some parameters + +Specifically C, C, C, C + +=item Creates a CPT::Writer + +=item Calls the writer's C method + +=item returns an array (not arrayref) of filenames + +These were the filenames that were produced in the writing process. This may be useful for data like CSV data where the output writer may produce N differently named files for each sheet of data. + +=back + +=head2 CRR + + $o->CRR(data => $ref); + +Writes data to an appropriately named file. (This is usually the "default" parameter supplied in the definition of this output). You should call this method first. + +=head2 subCRR + + $o->subCRR(data => $ref, filename => 'subreport', extension => 'html', data_format => 'text/html', format_as => 'HTML'); + +Writes data to an appropriately named sub file. A subfile is a file that will appear in a folder in the current directory. Subfiles are useful when you want to reference other output files in a primary HTML output or similar. C gives you a method to produce files and have them automatically placed in a sensible location, from which you can reference the files. + +Files are placed in C<< $self->files_path >>. We C this for you, ignoring any errors. If you're paranoid you might want to re-run the mkdir/test for permissions/etc. + +You must provide + +=over 4 + +=item filename + +name for the output file. You must generate this or it will be named identically to the parent. (And if you call it twice they will clobber each other silently and without mercy) + +=item extension + +E.g., 'png' + +=item data_format + +Internal data type. One of the standard C, C, C, etc. + +=item format_as + +You're welcome to provide a way to access the format parameter of subfiles to your users, however this is not done for you as there is no way for this module to know ahead of time how many subfiles you will produce. + +=back + +You may call this method after the first call to CRR or instead of calls to CRR + +=head2 varCRR + + $o->varCRR(data => $ref, filename => 'subreport', extension => 'html', data_format => 'text/html', format_as => 'HTML'); + +Writes data to an appropriately named var file. A var file or variable file is much like a subfile, except that in galaxy they will show up as individual history items. Additionally, the default behaviour from the command line is to place all generated files in the current working directory, rather than in a special folder. + +You must provide + +=over 4 + +=item filename + +name for the output file. You must generate this or it will be named identically to the parent. (And if you call it twice they will clobber each other silently and without mercy) + +=item extension + +E.g., 'png' + +=item data_format + +Internal data type. One of the standard C, C, C, etc. + +=item format_as + +You're welcome to provide a way to access the format parameter of subfiles to your users, however this is not done for you as there is no way for this module to know ahead of time how many subfiles you will produce. + +=back + +You may call this method after the first call to CRR or instead of calls to CRR + +=head2 bump_times_called + + $o->bump_times_called(); + +Bumps the internal number representing the number of times you've tried to output files for a given output object. This data is used in construction of filenames + +=head2 writer_for_format + + $o->writer_for_format('text/tabular', 'TSV_U'); + +Get the appropriate writer class and instantiate it for a given C and C. + +=head2 generate_galaxy_variable + + $o->generate_galaxy_variable(); + +If we need the files to show up as separate History items in galaxy, filenames have to be constructed like this: + +=over 4 + +=item F<$filepath/primary_546_output2_visible_bed> + +=item F<$filepath/primary_546_output3_visible_pdf> + +=back + +where filenames consist of 'primary', an ID number (provided in C on the command lien), a filename, 'visible', and an extension, all joined with C<_>. Additionally C<$filepath> is generally CWD (I think...) + +=head2 generate_nongalaxy_variable + + $o->generate_nongalaxy_variable(); + +=over 4 + +=item F<$given_filename.$extension> + +=back + +Parameters are taken from the object variables of the same names. + +=head2 generate_galaxy_subfile + + $o->generate_galaxy_subfile(); + +=over 4 + +=item F<$files_path/$given_filename.$extension> + +=back + +The paths for images and other files will end up looking something like +F with the galaxy provided C prepended to the filename. + +=head2 generate_nongalaxy_subfile + + $o->generate_nongalaxy_subfile(); + +See L. Know that the default for C<< $self->files_path >> is C<"outputname.files_path">. It's only "special" when run from inside galaxy. + +=head2 get_next_file + + $o->get_next_file(); + +If it's the first time this method has been called, it constructs a default filename. If the C variable is true, then it's just whatever value was passed. Otherwise it's just C and C put together. C is taken from C. + +If it's not the first time it was called, this module expects you to be using L or L to call (which has set C). Those will generate appropriate filenames with calls to one of + +=over 4 + +=item L + +=item L + +=item L + +=item L + +=back + +based on appropriate variables. + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Parameter.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Parameter.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,477 @@ +package CPT::Parameter; +use Moose::Role; +use strict; +use warnings; +use autodie; +use Carp; + +#requires 'galaxy_command'; +requires 'galaxy_input'; +requires 'galaxy_output'; +requires 'validate_individual'; +requires 'getopt_format'; + +# Long name for this parameter (mandatory) +has 'name' => ( is => 'rw', isa => 'Str' ); + +# Short name for this paramter (optional) +has 'short' => ( is => 'rw', isa => 'Str' ); +has 'multiple' => ( is => 'rw', isa => 'Bool' ); +has 'description' => ( is => 'rw', isa => 'Str' ); + +# Attr +# Default supplied parameters +has 'default' => ( is => 'rw', isa => 'Any' ); +# User supplied values +has 'value' => ( is => 'rw', isa => 'Any' ); +has 'required' => ( is => 'rw', isa => 'Bool' ); +has 'hidden' => ( is => 'rw', isa => 'Bool' ); + +# Set of error messages to be returned +has 'errors' => ( is => 'rw', isa => 'ArrayRef', default => sub { [] } ); + +# Unimplemented +# Are there any implications of setting this +has 'implies' => ( is => 'rw', isa => 'ArrayRef' ); + +# Internal +has '_index' => ( is => 'rw', isa => 'Int', default => 0 ); + +# Galaxy Specific +has '_galaxy_specific' => (is => 'rw', isa => 'Bool', default => 0); +# implies option is somehow intertwined with whether or not this is being produced for use in galaxy. +has '_show_in_galaxy' => (is => 'rw', isa => 'Bool', default => 1); +# This is a custom override. If the object is hidden by default, it will causae it to be shown. If the object is visible by default, it can cause it to be hidden. + + + +sub galaxy_command { + my ($self) = @_; + my $value = $self->get_galaxy_command_identifier(); + + # If it's hidden, specific to galaxy, and hidden from galaxy users, + # then it is safe to assume we've specified a SANE default. + if($self->hidden() && $self->_galaxy_specific()){ + $value = $self->default(); + } + my $string; + + # If it's a repeat, we handle that + $string .= $self->handle_possible_galaxy_command_repeat_start(); + # If it's required we set it to a value IF we have one. Otherwise value + # will be the galaxy_identifier. + if($self->required()){ + $string .= sprintf( '--%s "${%s}"' . "\n", + $self->get_galaxy_cli_identifier(), $value + ); + }else{ + # If + # This code is only relevant if we're multiple, otherwise the loop will + # not pass here + if ( !$self->multiple() ){ + $string .= sprintf('#if $%s and $%s is not "None":' . "\n", + $self->get_galaxy_cli_identifier(), + $self->get_galaxy_cli_identifier() + ); + } + # Flag + $string .= sprintf( '--%s "${%s}"'."\n", + $self->get_galaxy_cli_identifier(), + $value + ); + # End + if ( !$self->multiple() ){ + $string .= "#end if\n"; + } + } + $string .= $self->handle_possible_galaxy_command_repeat_end(); + return $string; +} + + + +sub getOptionsArray { + my ($self) = @_; + my @getoptions; + push( @getoptions, $self->getopt_identifier() ); + + my $mod_desc = $self->description(); + if(defined $self->default()){ + if(ref $self->default() eq 'ARRAY'){ + $mod_desc .= sprintf(" (Default: %s)", join(",",@{$self->default()})); + }else{ + $mod_desc .= sprintf(" (Default: %s)", $self->default()); + } + } + if(substr(blessed($self),0,22) eq 'CPT::Parameter::Option'){ + my %kv = %{$self->options()}; + my @k = keys(%kv); + $mod_desc .= sprintf(" (Options: %s)", + join( + ", ", + map { $kv{$_} . " [$_]" } @k + ) + ); + }else{ + } + + push( @getoptions, $mod_desc ); + + # Values to copy over: required, hidden, default, values + my %attr = (); + if ( $self->required() ) { + $attr{required} = $self->required(); + } + if ( $self->hidden() ) { + $attr{hidden} = $self->hidden(); + } + if ( $self->default() ) { + $attr{default} = $self->default(); + } + push( @getoptions, \%attr ); + return \@getoptions; +} + + +sub getopt_identifier { + my ($self) = @_; + if ( defined( $self->short() ) && length($self->short()) > 0 ) { + return sprintf( "%s|%s%s%s", $self->name(), $self->short(), $self->getopt_format(), ( $self->multiple() ? '@' : '' ), ); + } + else { + return sprintf( "%s%s%s", $self->name(), $self->getopt_format(), ( $self->multiple() ? '@' : '' ), ) + + } +} + + +sub get_galaxy_command_identifier { + my ($self) = @_; + if($self->multiple()){ + return sprintf('%s.%s', $self->get_repeat_idx_name(), $self->get_galaxy_cli_identifier()); + }else{ + return $self->get_galaxy_cli_identifier(); + } +} + + +sub get_galaxy_cli_identifier { + my ($self) = @_; + return $self->name(); +} + + +sub is_optional { + my ($self) = @_; + # Want coerced to int. + #return !$self->required(); + if($self->required()){ + return 0; + }else{ + return 1; + } +} + + +sub is_optional_galaxy { + my ($self) = @_; + return $self->is_optional() ? "True" : "False"; +} + + +sub update_index { + my ($self) = @_; + if($self->multiple()){ + my $size = scalar( @{ $self->value() } ); + # E.g: + # [1,2,3] , size = 3 + # index = 3 + # size = 3-1 = 2 + # index -> 0 + if ( $self->_index() ge $size - 1 ) { + $self->_index(0); + } + else { + $self->_index( $self->_index() + 1 ); + } + } +} + + +sub reset_index { + my ($self) = @_; + $self->_index(0); +} + + +sub get_value { + my ($self) = @_; + if ( defined $self->value() ) { + if ( $self->multiple ) { + my @data = @{ $self->value() }; + return $data[ $self->_index() ]; + } + else { + return $self->value(); + } + }else{ + return; + } +} + + +sub get_default { + my ($self) = @_; + if ( defined $self->default() ) { + if ( $self->multiple ) { + my @data = @{ $self->default() }; + return $data[ $self->_index() ]; + } + else { + return $self->default(); + } + }else{ + return; + } +} + + + +sub validate { + my ($self) = @_; + if ( $self->multiple() ) { + my $errors = 0; + if( ref($self->value()) ne 'ARRAY' ){ + carp "Author specified a non-array default value for " . $self->name() . ", which allows multiple values. Script author should modify the default value to be an ArrayRef."; + } + for my $val ( @{ $self->value() } ) { + if($self->validate_individual($val) == 0){ + $errors++; + } + } + # Must cast to number otherwise it returns "" which is bad since I use + # 1/0 as T/F (true = good, false = bad) + return 0+($errors == 0); + } + else { + return 0+$self->validate_individual($self->value()); + } +} + + +sub get_repeat_idx_name { + my ($self) = @_; + return 'item'; +} + + +sub get_repeat_name { + my ($self) = @_; + if($self->multiple()){ + return sprintf('repeat_%s', $self->get_galaxy_cli_identifier()); + }else{ + confess "Tried to get repeat name for non-multiple item"; + } +} + + +sub handle_possible_galaxy_input_repeat_start { + my ($self, $xml_writer ) = @_; + if ( $self->multiple() ) { + my $title = $self->get_galaxy_cli_identifier(); + $title =~ s/_/ /g; + # Convert To Title Case (http://www.davekb.com/browse_programming_tips:perl_title_case:txt) + $title =~ s/(\w+)/\u\L$1/g; + $xml_writer->startTag( + 'repeat', + 'name' => $self->get_repeat_name(), + 'title' => $title, + ); + } +} + + +sub handle_possible_galaxy_input_repeat_end { + my ($self, $xml_writer ) = @_; + if ( $self->multiple() ) { + $xml_writer->endTag('repeat'); + } +} + + + +sub handle_possible_galaxy_command_repeat_start { + my ( $self ) = @_; + if($self->multiple()){ + return sprintf("#for \$%s in \$%s:\n", + $self->get_repeat_idx_name(), + $self->get_repeat_name() + ); + }else{ + return ''; + } +} + + +sub handle_possible_galaxy_command_repeat_end { + my ( $self ) = @_; + if($self->multiple()){ + return "#end for\n"; + }else{ + return ''; + } +} + +sub get_default_input_parameters { + my ( $self, $type ) = @_; + my %params = ( + name => $self->get_galaxy_cli_identifier(), + optional => $self->is_optional_galaxy(), + label => $self->get_galaxy_cli_identifier(), + help => $self->description(), + type => $type, + ); + + # Multiple values would return ARRAY(0xAAAAAAA) locations, so we have to + # handle those semi-intelligently until galaxy can handle default values + # for repeats + if($self->multiple() && defined $self->default()){ + if(ref($self->default()) ne 'ARRAY'){ + carp "Author specified a non-array default value for " . $self->name() . ", which allows multiple values. Script author should modify the default value to be an ArrayRef."; + } + $params{value} = ${$self->default}[0]; + }elsif(!$self->multiple() && defined $self->default()){ + $params{value} = $self->default(); + } + + return %params; +} + +no Moose::Role; + +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Parameter + +=head1 VERSION + +version 1.99.4 + +=head2 galaxy_command + + $file_param->galaxy_command(); # where $file_param is a CPT::Parameter::* + +Returns the portion of the command used in the block in galaxy XML files + +=head2 getOptionsArray + +When called on a CPT::Parameter::* object, it will collapse the object into a GetOpt::Long compatible array + +=head2 getopt_identifier + +Used for backwards compatability with existing defaults => { 'file|f=s' => "Blah" } format + +=head2 get_galaxy_identifier + +Returns the identifier associated with a given variable. This identifier is what the Cheetah template knows the variable as (given the correct context). + +For non-multiple variables it should be the name of the variable. + +For multiple variables it will reference the repeat item name and then the variable name (e.g., C< $item.label >) + +=head2 get_galaxy_cli_identifier + +Returns the command line identifier (i.e., the command line flag) associated +with a given parameter. For a `--format` flag, this would return "format". +This should work out of the box, as CLI parameters have the same name as we +specify them with (even if they're repeated) + +=head2 is_optional + +If required, it is NOT optional; If not reqiured, it IS optional + +=head2 is_optional_galaxy + +Returns is_optional() as "True" or "False" for convenience and reduced code duplication + +=head2 update_index + +Convenience method to increment the index. This wraps around. + +=head2 reset_index + +convenience method to zero the index (i.e., the next get_value request will start at the beginning again) + +=head2 get_value + +Returns the value in the current index. + +=head2 get_default + +Returns the default in the current index. Something to note, please bear in +mind this you are trying to access an array based on an index which wraps +according to value() not according to default(). This means you may not reach +the end of default/reach over the end of default depending on how many values +the user actually passes + +=head2 validate + +Validation logic was eventually moved out here, as the logic for validaton is +identical everywhere, and requires slightly different behaviour based on +wheterh or not it's a single/multiple valued item. + +=head2 get_repeat_idx_name + +Function to obtain the name of the item as it is called inside the repeat. This +is necessary to know which variable we are referring to within a loop. + +=head2 get_repeat_name + +Function to obtain the name of the repeat. It is necessary that this is used +identically in the command section as well as in the input section. + +=head2 handle_possible_galaxy_input_repeat_start + +If the feature is repeated, this should automatically handle the start of that +repeat + +=head2 handle_possible_galaxy_input_repeat_end + +If the feature is repeated, this should automatically handle the end of that +repeat + +=head2 handle_possible_galaxy_command_repeat_start + +If the feature is repeated, this should automatically handle the start of that +repeat with a + + #for $item in $repeat_name: + +=head2 handle_possible_galaxy_command_repeat_end + +If the feature is repeated, this should automatically handle the end of that +repeat with + + #end for + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Parameter/Empty.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Parameter/Empty.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,56 @@ +package CPT::Parameter::Empty; +use Moose; +with 'CPT::Parameter'; + +has 'name' => (is => 'rw', isa => 'Any'); + +sub getOptionsArray{ + my ($self) = @_; + return []; +} +sub validate_individual{ + return 1; +} +sub galaxy_input{ + return; +} +sub galaxy_output{ + return; +} +sub galaxy_command{ + return; +} +sub getopt_format{ + return ''; +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Parameter::Empty + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Parameter/File/Input.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Parameter/File/Input.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,98 @@ +package CPT::Parameter::File::Input; +use Moose; +with 'CPT::Parameter'; + +has 'format' => ( is => 'rw', isa => 'Str' ); +has 'file_format' => ( is => 'rw', isa => 'ArrayRef' ); + +# Format is something like "fastq", "bam", etc. +# This means when you write the parameter string it'll be +# ['file|f=s', 'Input Bam File', { required => 1, format=> 'file/input/bam'} ] +# Which will specify that you require a BAM file. + + +sub galaxy_input { + my ( $self, $xml_writer ) = @_; + $self->handle_possible_galaxy_input_repeat_start($xml_writer); + my %params = $self->get_default_input_parameters('data'); + if( defined $self->file_format() ){ + $params{format} = join( ',', @{$self->file_format()} ); + } + $xml_writer->startTag( + 'param', + %params + ); + $xml_writer->endTag('param'); + $self->handle_possible_galaxy_input_repeat_end($xml_writer); +} + + +sub galaxy_output { + my ($self, $xml_writer) = @_; + return $xml_writer; +} + + +sub validate_individual { + my ($self, $val) = @_; + + # Maybe do format validation here? Maybe? + if ( -e $val ) { + return 1; + }else{ + push(@{$self->errors()}, sprintf( "File [%s] supplied to option %s does not exist", $val,$self->name())); + return 0; + } +} + + +sub getopt_format { + return '=s'; +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Parameter::File::Input + +=head1 VERSION + +version 1.99.4 + +=head2 galaxy_input + + $file_param->galaxy_input($xml_writer); # where $file_param is a CPT::Parameter::* + +Utilises the $xml_writer to add a block in the section + +=head2 galaxy_output + + $file_param->galaxy_output($xml_writer); # where $file_param is a CPT::Parameter::* + +Utilises the $xml_writer to add a block in the section + +=head2 getopt_format + +Returns the format character for a given CPT::Parameter::* type + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Parameter/File/Output.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Parameter/File/Output.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,153 @@ +package CPT::Parameter::File::Output; +use Moose; +with 'CPT::Parameter'; +use CPT::OutputFiles; + +# Has the user requested that the format is ALWAYS of a specific type. This is +# useful when (e.g,.) CSV output is required because it's part of a pipeline. +# Of course, in a perfect world that wouldn't be necessary as we'd be able to +# read in data and the only constraint would be that it was "text/tabular" and +# magically we'd have a hash just like we would with CSV. Sigh.... +has 'hardcoded' => ( is => 'rw', isa => 'Bool' ); +# The format of the internal data structure that we're pushing to output +# See CPT.pm for a list of these (under %acceptable) +has 'data_format' => ( is => 'rw', isa => 'Str' ); +has 'default_format' => ( is => 'rw', isa => 'Str' ); + +# registered => ['text/tabular~CSV', 'text/plain=TXT'], +has 'registered_types' => ( is => 'rw', isa => 'ArrayRef' ); +has 'cpt_outputfile_data_access' => ( is => 'ro', isa => 'Any', default => sub { CPT::OutputFiles->new() } ); + + + +sub galaxy_input { + + # Required by our parent. For an output file, this is non-functional + my ( $self, $xml_writer ) = @_; + $self->handle_possible_galaxy_input_repeat_start($xml_writer); + my %params = $self->get_default_input_parameters('select'); + $params{label} = 'Format of ' . $self->get_galaxy_cli_identifier(), + $params{name} = sprintf( "%s_%s", $self->get_galaxy_cli_identifier, 'format' ), + # Remove any default values for galaxy + delete $params{value}; + $xml_writer->startTag( + 'param', + %params, + ); + + if(defined $self->data_format()){ + + foreach ( sort @{ $self->cpt_outputfile_data_access()->valid_formats($self->data_format()) } ) { + my %p = (value => $_); + if($_ eq $self->default_format()){ + $p{selected} = 'True'; + } + $xml_writer->startTag( 'option', %p ); + $xml_writer->characters( $_ ); + $xml_writer->endTag('option'); + } + }else{ + $xml_writer->startTag( 'option', value => 'data', selected => 'True' ); + $xml_writer->characters( 'data' ); + $xml_writer->endTag('option'); + } + $xml_writer->endTag('param'); + $self->handle_possible_galaxy_input_repeat_end($xml_writer); +} + + +sub galaxy_output { + my ( $self, $xml_writer ) = @_; + my $format; + if(defined $self->default_format()){ + $format = $self->default_format(); + }else{ + $format = 'data'; + } + + $xml_writer->startTag( + 'data', + name => $self->get_galaxy_cli_identifier(), + format => $format, + ); + + if ( !$self->hardcoded() ) { + $xml_writer->startTag('change_format'); + # Otherwise it's still going to be set as the default_format so we're not toooo worried. + if(defined($self->data_format())){ + my @galaxy_formats = @{ $self->cpt_outputfile_data_access()->valid_formats($self->data_format()) }; + foreach (sort @galaxy_formats) { + $xml_writer->startTag( + 'when', + input => sprintf( "%s_%s", $self->get_galaxy_cli_identifier, 'format' ), + value => $_, + format => $self->cpt_outputfile_data_access()->get_format_mapping($_), + ); + $xml_writer->endTag('when'); + } + } + $xml_writer->endTag('change_format'); + } + $xml_writer->endTag('data'); +} + +sub validate_individual { + my ($self, $val) = @_; + #if(! -e $self->value()){ + # return 1; + #} + #return 0; + return 1; +} + + +sub getopt_format { + return '=s'; +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Parameter::File::Output + +=head1 VERSION + +version 1.99.4 + +=head2 galaxy_input + + $file_param->galaxy_input($xml_writer); # where $file_param is a CPT::Parameter::* + +Utilises the $xml_writer to add a block in the section + +=head2 galaxy_output + + $file_param->galaxy_output($xml_writer); # where $file_param is a CPT::Parameter::* + +Utilises the $xml_writer to add a block in the section + +=head2 getopt_format + +Returns the format character for a given CPT::Parameter::* type + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Parameter/File/OutputFormat.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Parameter/File/OutputFormat.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,73 @@ +package CPT::Parameter::File::OutputFormat; +use Moose; +with 'CPT::Parameter'; + + + +sub galaxy_input { + + # Required by our parent. For an output file, this is non-functional + my ( $self, $xml_writer ) = @_; +} + + +sub galaxy_output { + my ( $self, $xml_writer ) = @_; +} + +sub validate_individual { + my ($self, $val) = @_; + return 1; +} + + +sub getopt_format { + return '=s'; +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Parameter::File::OutputFormat + +=head1 VERSION + +version 1.99.4 + +=head2 galaxy_input + + $file_param->galaxy_input($xml_writer); # where $file_param is a CPT::Parameter::* + +Utilises the $xml_writer to add a block in the section + +=head2 galaxy_output + + $file_param->galaxy_output($xml_writer); # where $file_param is a CPT::Parameter::* + +Utilises the $xml_writer to add a block in the section + +=head2 getopt_format + +Returns the format character for a given CPT::Parameter::* type + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Parameter/Flag.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Parameter/Flag.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,139 @@ +package CPT::Parameter::Flag; +use Moose; +with 'CPT::Parameter'; + + + +sub galaxy_command { + my ($self) = @_; + my $value = $self->get_galaxy_command_identifier(); + + # If it's hidden, specific to galaxy, and hidden from galaxy users, + # then it is safe to assume we've specified a SANE default. + if($self->hidden() && $self->_galaxy_specific()){ + $value = $self->default(); + } + my $string; + + # If it's a repeat, we handle that + $string .= $self->handle_possible_galaxy_command_repeat_start(); + # If it's required we set it to a value IF we have one. Otherwise value + # will be the galaxy_identifier. + if($self->required()){ + $string .= sprintf( '--%s' . "\n", + $self->get_galaxy_cli_identifier() + ); + }else{ + # If + # This code is only relevant if we're multiple, otherwise the loop will + # not pass here + if ( !$self->multiple() ){ + $string .= sprintf('#if $%s:' . "\n", + $self->get_galaxy_cli_identifier() + ); + } + # Flag + $string .= sprintf( '--%s'."\n", + $self->get_galaxy_cli_identifier(), + ); + # End + if ( !$self->multiple() ){ + $string .= "#end if\n"; + } + } + $string .= $self->handle_possible_galaxy_command_repeat_end(); + return $string; +} + + + +sub galaxy_input { + my ( $self, $xml_writer ) = @_; + $self->handle_possible_galaxy_input_repeat_start($xml_writer); + my %params = $self->get_default_input_parameters('boolean'); + $params{falsevalue} = 'False'; + $params{truevalue} = 'True'; + if($self->default()){ + $params{checked} = 'True'; + }else{ + $params{checked} = ''; + } + # Remove value since we use "checked" here + delete $params{value}; + + $xml_writer->startTag( + 'param', + %params, + ); + $xml_writer->endTag('param'); + $self->handle_possible_galaxy_input_repeat_end($xml_writer); +} + + +sub galaxy_output { + +} + + +sub validate_individual { + my ($self) = @_; + return 1; +} + + +sub getopt_format { + return ''; +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Parameter::Flag + +=head1 VERSION + +version 1.99.4 + +=head2 galaxy_command + + $file_param->galaxy_command(); # where $file_param is a CPT::Parameter::* + +Returns the portion of the command used in the block in galaxy XML files + +=head2 galaxy_input + + $file_param->galaxy_input($xml_writer); # where $file_param is a CPT::Parameter::* + +Utilises the $xml_writer to add a block in the section + +=head2 galaxy_output + + $file_param->galaxy_output($xml_writer); # where $file_param is a CPT::Parameter::* + +Utilises the $xml_writer to add a block in the section + +=head2 getopt_format + +Returns the format character for a given CPT::Parameter::* type + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Parameter/Float.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Parameter/Float.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,106 @@ +package CPT::Parameter::Float; +use Scalar::Util qw(looks_like_number); +use Moose; +with 'CPT::Parameter'; + +has 'min' => ( is => 'rw', isa => 'Num' ); +has 'max' => ( is => 'rw', isa => 'Num' ); + + +sub galaxy_input { + my ( $self, $xml_writer ) = @_; + $self->handle_possible_galaxy_input_repeat_start($xml_writer); + my %params = $self->get_default_input_parameters('float'); + + if(defined $self->min()){ + $params{min} = $self->min(); + } + if(defined $self->max()){ + $params{max} = $self->max(); + } + + $xml_writer->startTag( + 'param', + %params + ); + $xml_writer->endTag('param'); + $self->handle_possible_galaxy_input_repeat_end($xml_writer); +} + + +sub galaxy_output { + +} + + +sub validate_individual { + my ($self, $value) = @_; + if ( looks_like_number( $value ) ) { + # Check bounds + if ( defined $self->max() && $value > $self->max() ) { + push(@{$self->errors()}, sprintf( "Value passed with %s was greater than the allowable upper bound. [%s > %s]", $self->name(), $value, $self->max() )); + return 0; + } + if ( defined $self->min() && $value < $self->min() ) { + push(@{$self->errors()}, sprintf( "Value passed with %s was smaller than the allowable minimum bound. [%s < %s]", $self->name(), $value, $self->min() )); + return 0; + } + return 1; + } + else { + push(@{$self->errors()}, sprintf( "Value passed with %s does not look like a float [%s]", $self->name(), $value )); + return 0; + } +} + + +sub getopt_format { + return '=s'; +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Parameter::Float + +=head1 VERSION + +version 1.99.4 + +=head2 galaxy_input + + $file_param->galaxy_input($xml_writer); # where $file_param is a CPT::Parameter::* + +Utilises the $xml_writer to add a block in the section + +=head2 galaxy_output + + $file_param->galaxy_output($xml_writer); # where $file_param is a CPT::Parameter::* + +Utilises the $xml_writer to add a block in the section + +=head2 getopt_format + +Returns the format character for a given CPT::Parameter::* type + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Parameter/Int.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Parameter/Int.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,105 @@ +package CPT::Parameter::Int; +use Scalar::Util qw(looks_like_number); +use Moose; +with 'CPT::Parameter'; + +has 'min' => ( is => 'rw', isa => 'Int' ); +has 'max' => ( is => 'rw', isa => 'Int' ); + + + +sub galaxy_input { + my ( $self, $xml_writer ) = @_; + $self->handle_possible_galaxy_input_repeat_start($xml_writer); + my %params = $self->get_default_input_parameters('integer'); + + if(defined $self->min()){ + $params{min} = $self->min(); + } + if(defined $self->max()){ + $params{max} = $self->max(); + } + $xml_writer->startTag( + 'param', + %params, + ); + $xml_writer->endTag('param'); + $self->handle_possible_galaxy_input_repeat_end($xml_writer); +} + + +sub galaxy_output { + +} + +sub validate_individual { + my ($self, $value) = @_; + if ( looks_like_number( $value ) ) { + # Check bounds + if ( defined $self->max() && $value > $self->max() ) { + push(@{$self->errors()}, sprintf( "Value passed with %s was greater than the allowable upper bound. [%s > %s]", $self->name(), $value, $self->max() )); + return 0; + } + if ( defined $self->min() && $value < $self->min() ) { + push(@{$self->errors()}, sprintf( "Value passed with %s was smaller than the allowable minimum bound. [%s < %s]", $self->name(), $value, $self->min() )); + return 0; + } + return 1; + } + else { + push(@{$self->errors()}, sprintf( "Value passed with %s does not look like a float [%s]", $self->name(), $value )); + return 0; + } +} + + +sub getopt_format { + return '=i'; +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Parameter::Int + +=head1 VERSION + +version 1.99.4 + +=head2 galaxy_input + + $file_param->galaxy_input($xml_writer); # where $file_param is a CPT::Parameter::* + +Utilises the $xml_writer to add a block in the section + +=head2 galaxy_output + + $file_param->galaxy_output($xml_writer); # where $file_param is a CPT::Parameter::* + +Utilises the $xml_writer to add a block in the section + +=head2 getopt_format + +Returns the format character for a given CPT::Parameter::* type + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Parameter/Label.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Parameter/Label.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,58 @@ +package CPT::Parameter::Label; +use Moose; +with 'CPT::Parameter'; + +has 'label' => (is => 'rw', isa => 'Str'); +has 'name' => (is => 'rw', isa => 'Any'); + +sub getOptionsArray{ + my ($self) = @_; + return [$self->label()]; +} +sub validate_individual{ + return 1; +} + +sub galaxy_input{ + return; +} +sub galaxy_output{ + return; +} +sub galaxy_command{ + return; +} +sub getopt_format{ + return ''; +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Parameter::Label + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Parameter/Option.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Parameter/Option.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,100 @@ +package CPT::Parameter::Option; +use Moose::Role; +use strict; +use warnings; +use autodie; +with 'CPT::Parameter'; + +has 'options' => ( is => 'rw', isa => 'HashRef' ); +# stored as {short => "some long text"} + + +sub galaxy_input { + my ( $self, $xml_writer ) = @_; + $self->handle_possible_galaxy_input_repeat_start($xml_writer); + my %params = $self->get_default_input_parameters('select'); + $xml_writer->startTag( + 'param', + %params, + ); + my %options = %{ $self->options() }; + foreach ( sort( keys(%options)) ) { + my %p = (value => $_); + if(defined $_ && defined $self->default() && $_ eq $self->default()){ + $p{selected} = 'True'; + } + $xml_writer->startTag( 'option', %p); + $xml_writer->characters( $options{$_} ); + $xml_writer->endTag('option'); + } + $xml_writer->endTag('param'); + $self->handle_possible_galaxy_input_repeat_end($xml_writer); +} + + +sub galaxy_output { + +} + +sub validate_individual { + my ($self, $val) = @_; + my %options = %{ $self->options() }; + if($options{$val}){ + return 1; + }{ + push(@{$self->errors()}, sprintf( "Unknown value [%s] supplied to a option %s", $val,$self->name())); + return 0; + } +} + + +sub getopt_format { + return '=s'; +} + +no Moose::Role; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Parameter::Option + +=head1 VERSION + +version 1.99.4 + +=head2 galaxy_input + + $file_param->galaxy_input($xml_writer); # where $file_param is a CPT::Parameter::* + +Utilises the $xml_writer to add a block in the section + +=head2 galaxy_output + + $file_param->galaxy_output($xml_writer); # where $file_param is a CPT::Parameter::* + +Utilises the $xml_writer to add a block in the section + +=head2 getopt_format + +Returns the format character for a given CPT::Parameter::* type + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Parameter/Option/Generic.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Parameter/Option/Generic.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,42 @@ +package CPT::Parameter::Option::Generic; +use Moose; +with 'CPT::Parameter::Option'; + + +sub getopt_format { + return '=s'; +} +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Parameter::Option::Generic + +=head1 VERSION + +version 1.99.4 + +=head2 getopt_format + +Returns the format character for a given CPT::Parameter::* type + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Parameter/Option/Genomic_Tag.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Parameter/Option/Genomic_Tag.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,48 @@ +package CPT::Parameter::Option::Genomic_Tag; +use Moose; +with 'CPT::Parameter::Option'; + +my @validKeys = ( "-10_signal", "-35_signal", "3'UTR", "5'UTR", "CAAT_signal", "CDS", "C_region", "D-loop", "D_segment", "GC_signal", "J_segment", "LTR", "N_region", "RBS", "STS", "S_region", "TATA_signal", "V_region", "V_segment", "assembly_gap", "attenuator", "enhancer", "exon", "gap", "gene", "iDNA", "intron", "mRNA", "mat_peptide", "misc_RNA", "misc_binding", "misc_difference", "misc_feature", "misc_recomb", "misc_signal", "misc_structure", "mobile_element", "modified_base", "ncRNA", "old_sequence", "operon", "oriT", "polyA_signal", "polyA_site", "precursor_RNA", "prim_transcript", "primer_bind", "promoter", "protein_bind", "rRNA", "rep_origin", "repeat_region", "sig_peptide", "source", "stem_loop", "tRNA", "terminator", "tmRNA", "transit_peptide", "unsure", "variation", "whole", "all" ); +my %validKeySet = map { $_ => $_ } @validKeys; + +has 'options' => ( is => 'rw', isa => 'HashRef', default => sub { \%validKeySet } ); + + +sub getopt_format { + return '=s'; +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Parameter::Option::Genomic_Tag + +=head1 VERSION + +version 1.99.4 + +=head2 getopt_format + +Returns the format character for a given CPT::Parameter::* type + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Parameter/String.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Parameter/String.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,78 @@ +package CPT::Parameter::String; +use Moose; +with 'CPT::Parameter'; + + +sub galaxy_input { + my ( $self, $xml_writer ) = @_; + $self->handle_possible_galaxy_input_repeat_start($xml_writer); + my %params = $self->get_default_input_parameters('text'); + $xml_writer->startTag( + 'param', + %params, + ); + $xml_writer->endTag('param'); + $self->handle_possible_galaxy_input_repeat_end($xml_writer); +} + + +sub galaxy_output { + +} + + +sub validate_individual { + my ($self) = @_; + return 1; +} + + +sub getopt_format { + return '=s'; +} +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Parameter::String + +=head1 VERSION + +version 1.99.4 + +=head2 galaxy_input + + $file_param->galaxy_input($xml_writer); # where $file_param is a CPT::Parameter::* + +Utilises the $xml_writer to add a block in the section + +=head2 galaxy_output + + $file_param->galaxy_output($xml_writer); # where $file_param is a CPT::Parameter::* + +Utilises the $xml_writer to add a block in the section + +=head2 getopt_format + +Returns the format character for a given CPT::Parameter::* type + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/ParameterCollection.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/ParameterCollection.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,364 @@ +package CPT::ParameterCollection; +use Carp; +use Moose; +use strict; +use warnings; +use autodie; +use Data::Dumper; + +# A collection of parameters + +has 'params' => ( is => 'rw', isa => 'ArrayRef', default => sub{[]}); + + +sub validate { + my ( $self, $getopt_obj) = @_; + my $issue_count = 0; + for my $item ( @{ $self->params() } ) { + my $type = ref($item); + # We now check that getopt has supplied a value (we don't want to validate values that were NOT supplied. That'd be dumb) + # If it's defined AND it doesn't validate, then we add an error on the stack for that. + if(defined $item->name() && defined $getopt_obj->{$item->name()} && !$item->validate()){ + carp join("\n", @{$item->errors()}); + $issue_count++; + } + } + return $issue_count == 0; +} + + + +sub push_group { + my ( $self, $group ) = @_; + $self->push_params($group->flattenOptionsArray()); +} + + +sub push_param { + my ( $self, $param ) = @_; + $self->_push($self->_coerce_param($_)); +} + + +sub push_params { + my ( $self, $array_ref ) = @_; + foreach(@{$array_ref}){ + my $result = $self->_coerce_param($_); + if($result){ + $self->_push($result); + } + } +} + +sub _push{ + my ( $self, $array_ref ) = @_; + my @arr; + if($self->params()){ + @arr = @{$self->params()}; + } + push(@arr, $array_ref); + $self->params(\@arr); +} + + +sub parse_short_name { + my ( $self, $parameter ) = @_; + if ( index( $parameter, '|' ) > -1 ) { + return substr( $parameter, index( $parameter, '|' ) + 1 ); + } + else { + return ""; + } +} + + +sub parse_long_name { + my ( $self, $parameter ) = @_; + if ( index( $parameter, '|' ) > -1 ) { + return substr( $parameter, 0, index( $parameter, '|' ) ); + } + else { + return $parameter; + } +} + + +sub _coerce0 { + my ($self) = @_; + require CPT::Parameter::Empty; + my $p = CPT::Parameter::Empty->new(); + return $p; +} +sub _coerce1 { + my ($self, @parts) = @_; + require CPT::Parameter::Label; + my $p = CPT::Parameter::Label->new(label=> $parts[0]); + return $p; +} +sub _coerce2 { + my ($self, @parts) = @_; + require CPT::Parameter::Flag; + my $p = CPT::Parameter::Flag->new( + name => $self->parse_long_name( $parts[0] ), + short => $self->parse_short_name( $parts[0] ), + multiple => 0, + description => $parts[1], + ); + return $p; +} +sub _coerce3 { + my ($self, @parts) = @_; + # Three parameter case + my %attr = ( + name => $self->parse_long_name( $parts[0] ), + short => $self->parse_short_name( $parts[0] ), + multiple => 0, + description => $parts[1], + ); + + # create the attr + my %set_attr = %{ $parts[2] }; + + # Check if various things are set, if so, copy them. + foreach (qw(default options required hidden implies multiple _show_in_galaxy _galaxy_specific data_format default_format file_format)) { + if ( defined $set_attr{$_} ) { + $attr{$_} = $set_attr{$_}; + } + } + + # Now, if validate is set, we can choose a type and possibly do other coersion. + if ( $set_attr{'validate'} ) { + my $validate = $set_attr{'validate'}; + my $p; + if ( $validate eq 'Flag' ) { + require CPT::Parameter::Flag; + $p = CPT::Parameter::Flag->new(%attr); + } + elsif ( $validate eq 'Float' ) { + foreach (qw(min max)) { + if ( $set_attr{$_} ) { + $attr{$_} = $set_attr{$_}; + } + } + require CPT::Parameter::Float; + $p = CPT::Parameter::Float->new(%attr); + } + elsif ( $validate eq 'Int' ) { + foreach (qw(min max)) { + if ( $set_attr{$_} ) { + $attr{$_} = $set_attr{$_}; + } + } + require CPT::Parameter::Int; + $p = CPT::Parameter::Int->new(%attr); + } + elsif ( $validate eq 'Option' ) { + foreach (qw(options)) { + if ( $set_attr{$_} ) { + $attr{$_} = $set_attr{$_}; + } + } + require CPT::Parameter::Option::Generic; + $p = CPT::Parameter::Option::Generic->new(%attr); + } + elsif ( $validate eq 'String' ) { + require CPT::Parameter::String; + $p = CPT::Parameter::String->new(%attr); + } + elsif ( $validate eq 'File/Input' ) { + require CPT::Parameter::File::Input; + $p = CPT::Parameter::File::Input->new(%attr); + } + elsif ( $validate eq 'File/Output' ) { + require CPT::Parameter::File::Output; + $p = CPT::Parameter::File::Output->new(%attr); + } + elsif ( $validate eq 'File/OutputFormat' ) { + require CPT::Parameter::File::OutputFormat; + $p = CPT::Parameter::File::OutputFormat->new(%attr); + } + elsif ( $validate eq 'Genomic/Tag' ) { + require CPT::Parameter::Option::Genomic_Tag; + $p = CPT::Parameter::Option::Genomic_Tag->new(%attr); + } + else { + die 'Unknown validation type: ' . $validate; + } + return $p; + } + else { + require CPT::Parameter::Flag; + my $p = CPT::Parameter::Flag->new(%attr); + return $p; + } +} + +sub _coerce_param { + my ( $self, $param ) = @_; + if ( ref($param) eq 'ARRAY' ) { + my @parts = @{$param}; + if ( scalar @parts == 0 ) { + return $self->_coerce0(@parts); + } + elsif ( scalar @parts == 1 ) { + return $self->_coerce1(@parts); + } + elsif ( scalar @parts == 2 ) { + return $self->_coerce2(@parts); + } + else { + return $self->_coerce3(@parts); + } + } + else { + die 'A non-array type was attempted to be coerced...'; + } +} + + +sub get_by_name { + my ( $self, $name ) = @_; + for my $item ( @{ $self->params() } ) { + if ( defined $item->name() && $item->name() eq $name ) { + return $item; + } + } + return; +} + + +sub getopt { + my ($self) = @_; + my @clean_opt_spec; + + # Loop through each item + for my $item ( @{ $self->params() } ) { + my $type = ref($item); + + # If it's an array, that means it's definitely an old style + if ( $type eq 'ARRAY' ) { + + # And we can push it through without any issues + push( @clean_opt_spec, $item ); + } + + # If it's a hash, it's probably one of the { one_of/xor/etc } + elsif ( $type eq 'CPT::ParameterGroup' ) { + + # D: + push( @clean_opt_spec, $item->flattenOptionsArray() ); + } + + # Otherwise it's one of our CPT::Parameter stuff + else { + + # Otherwise, we'll use the method to transform our complex object into a GetOpt compatible item + push( @clean_opt_spec, $item->getOptionsArray() ); + } + } + return @clean_opt_spec; +} + + +sub populate_from_getopt { + my ( $self, $opt ) = @_; + # Loop through each item + for my $item ( @{ $self->params() } ) { + # If it's has a name, and options supplies a value for that name + if ( defined($item->name()) && defined ($opt->{$item->name()})){ + $item->value($opt->{ $item->name() }); + } + } +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::ParameterCollection + +=head1 VERSION + +version 1.99.4 + +=head2 validate + + $pC->validate(); + +calls the validate method, which loops through and checks that user values line +up with the validate method in each and every slot. + +=head2 push_group + + $pC->push_group(CPT::Parameter::Flag->new( )); + +Push a new groupeter onto the array + +=head2 push_param + + $pC->push_param(CPT::Parameter::Flag->new( )); + +Push a new parameter onto the array + +=head2 push_params + + $pC->push_param([ + + ]); + +Pushes a lot of params at once onto the array + +=head2 parse_short_name + + $pc->parse_short_name("file|f"); + # would return "f" + +=head2 parse_long_name + + $pc->parse_long_name("file|f"); + # would return "file" + +=head2 _coerce_param + + $pc->_coerce_param(["file|f","input file",{validate=>'File/Input'}]); + +would return a CPT::Parameter::File::Input object. + +=head2 get_by_name + + $pC->get_by_name('format'); + +returns the CPT::Parameter object with that key. + +=head2 getopt + + my @getopt_compatible_array = $pC->getopt() + +Returns a getopt compatible array by looping through the array and simply returning array objects, and calling the getOptionsArray method on CPT::Parameter::* objects + +=head2 populate_from_getopt + + $parameterCollection->populate_from_getopt($opt); + +Populate the ->value() from getopt. + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/ParameterGroup.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/ParameterGroup.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,161 @@ +package CPT::ParameterGroup; +use Moose; +use strict; +use warnings; +use autodie; + +# A special type of a ParameterCollection (could probably be a child...ohwell) +use Moose::Util::TypeConstraints; +#subtype 'TypeStr', as 'Str', where { $_ eq 'xor' || $_ eq 'or' || $_ eq 'and' }; +# Replaced with the enum + +has 'name' => ( is => 'rw', isa => 'Str'); +has 'description' => ( is => 'rw', isa => 'Str'); +has 'validator' => ( is => 'rw', isa => enum([qw(xor or and)])); +has 'options' => ( is => 'rw', isa => 'ArrayRef[HashRef]' ); + + +sub validate { + die 'Unimplemented'; +} + + + +sub set_data { + my ($self, $hash_ref) = @_; + my %d = %{$hash_ref}; + $self->name($d{name}); + $self->description($d{description}); + $self->validator($d{validator}); + $self->options($d{options}); +} + + + +sub getopt { + my ($self) = @_; + die 'unimplemented'; +} + +sub flattenOptionsArray{ + my ($self) = @_; + my @opts; + push(@opts, [sprintf("Option Group: %s\n%s\n[%s]", $self->name(), $self->description(), $self->_formatted_choice_str)]); + require CPT::ParameterCollection; + my $pC = CPT::ParameterCollection->new(); + foreach(@{$self->options()}){ + my %z = %{$_}; + my $group_name = $z{group}; + my @group_opts = @{$z{options}}; + push(@opts, [sprintf("Subgroup: %s", $group_name)]); + foreach(@group_opts){ + my $p = $pC->_coerce_param($_); + push(@opts, $p->getOptionsArray()); + } + } + return \@opts; +} + +sub _formatted_choice_str{ + my ($self) = @_; + if($self->validator() eq 'xor'){ + return 'Please only use options from ONE of the groups below, and no more'; + }elsif($self->validator() eq 'or'){ + return 'Please only use options from at least ONE of the groups below'; + }elsif($self->validator() eq 'and'){ + return 'Please ensure values/defaults are specified for ALL of the options in the groups below'; + } + return undef; +} + + +sub populate_from_getopt { + my ( $self, $opt ) = @_; + die 'unimplemented'; +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::ParameterGroup + +=head1 VERSION + +version 1.99.4 + +=head2 validate + + $pC->validate(); + +calls the validate method, which loops through and checks that user values line up with the validate method in each and every slot. + +Currently unimplemented! + +=head2 getopt + + my @getopt_compatible_array = $pC->getopt() + +Returns a getopt compatible array by looping through the array and simply returning array objects, and calling the getOptionsArray method on CPT::Parameter::* objects + +=head2 populate_from_getopt + + $parameterCollection->populate_from_getopt($opt); + +Populate the ->value() from getopt. This is the special sauce of this portion of the module. +Our test case for this function is the connector choice problem. + +{ + name => 'Data Source #1', + description => "FASTA data source for our script", + type => 'xor', # must select only from one subgroup + options => [ + { + group => 'Chado Custom', + options => [ + [ 'host' => 'Hostname', { required => 1, validate => 'Str' } ], + [ 'user' => 'Username', { required => 1, validate => 'Str' } ], + [ 'pass' => 'Password', { required => 1, validate => 'Str' } ], + [ 'name' => 'Database name', { required => 1, validate => 'Str' } ], + [ 'organism' => 'organism name', { required => 1, validate => 'Str' } ], + [ 'landmark' => 'landmark name', { required => 1, validate => 'Str' } ], + ] + }, + { + group => 'Chado GMOD pre-defined connector', + options => [ + [ 'conn=s' => 'Connection Nickname', { required => 1, validate => 'Str' } ], + ] + }, + { + group => 'File', + options => [ + [ 'file|f=' => 'Input file', { required => 1, validate => 'File/Input' } ], + ] + }, + + ] + }, + +This should intelligently set parameters in $opt based on the passed data. The real question is how to handle password.... + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Plot/ArtemisColours.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Plot/ArtemisColours.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,102 @@ +package CPT::Plot::ArtemisColours; +use Moose; +use strict; +use warnings; +use Carp; + +has format => ( + is => 'rw', + isa => 'Str', + default => sub { + 'svg/rgb' + }, +); + +my %artemis_colours = ( + 0 => [ 255, 255, 255 ], + 1 => [ 100, 100, 100 ], + 2 => [ 255, 0, 0 ], + 3 => [ 0, 255, 0 ], + 4 => [ 0, 0, 255 ], + 5 => [ 0, 255, 255 ], + 6 => [ 255, 0, 255 ], + 7 => [ 255, 255, 0 ], + 8 => [ 152, 251, 152 ], + 9 => [ 135, 206, 250 ], + 10 => [ 255, 165, 0 ], + 11 => [ 200, 150, 100 ], + 12 => [ 255, 200, 200 ], + 13 => [ 170, 170, 170 ], + 14 => [ 0, 0, 0 ], + 15 => [ 255, 63, 63 ], + 16 => [ 255, 127, 127 ], + 17 => [ 255, 191, 191 ], +); + +sub getAvailableFormats { + my ($self) = @_; + return [ 'svg/rgb', 'artemis' ]; +} + +sub getColour { + my ( $self, $string ) = @_; + if ($string) { + my @rgb; + if ( $string =~ qr/^\s*(\d+)\s*$/ ) { + @rgb = @{ $artemis_colours{$1} }; + } + elsif ( $string =~ qr/^\s*(\d+)\s+(\d+)\s+(\d+)\s*$/ ) { + @rgb = ( $1, $2, $3 ); + } + else { + confess "Bad Colour Specfication [$string]"; + return; + } + + # return $colour_result; + my $format = $self->format(); + if ( $format eq 'svg/rgb' ) { + return 'rgb(' . join( ',', @rgb ) . ')'; + } + elsif ( $format eq 'artemis' ) { + return join( ' ', @rgb ); + } + else { + carp "Bad format specified, or format not added to spec list [$format]"; + } + } + else { + return; + } +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Plot::ArtemisColours + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Plot/Base.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Plot/Base.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,983 @@ +package CPT::Plot::Base; +use Data::Dumper; +use CPT::Plot::Label; +use CPT::Plot::Class; +use CPT::Plot::Gene; +use CPT::Plot::Colours; +use Bio::SeqIO; +use SVG; +use Moose; + +# ABSTRACT: Main plotting class for genome mapper + +has 'svg' => ( is => 'rw', isa => 'Any' ); +has 'line_count' => ( is => 'rw', isa => 'Num', default => 1 ); +has '_ft_count' => ( is => 'rw', isa => 'Num', default => 0 ); +has 'classes' => ( is => 'rw', isa => 'HashRef' ); + +# Labels +has 'label' => ( is => 'rw', isa => 'Bool' ); + +has 'label_pos' => ( is => 'rw', isa => 'Any' ); +has 'label_shrink_mode' => ( is => 'rw', isa => 'Any' ); +has 'label_callouts' => ( is => 'rw', isa => 'Any' ); +has 'label_from' => ( is => 'rw', isa => 'Any' ); +has 'label_text_source' => ( is => 'rw', isa => 'Any' ); +has 'label_numeric_features' => ( is => 'rw', isa => 'Any' ); +has 'label_query' => ( is => 'rw', isa => 'Any' ); +has 'label_numbering_count' => ( is => 'rw', isa => 'Any', default => 1 ); + +has 'justified' => ( is => 'rw', isa => 'Str' ); + +# CHanged to any b/c unpassed = undef +has 'separate_strands' => ( is => 'rw', isa => 'Any' ); +has 'double_line_for_overlap' => ( is => 'rw', isa => 'Any' ); +has 'opacity' => ( is => 'rw', isa => 'Str' ); +has 'view' => ( is => 'rw', isa => 'Str' ); + +has 'color_scheme' => ( is => 'rw', isa => 'HashRef' ); +has 'wanted_tags' => ( is => 'rw', isa => 'HashRef' ); +has 'genome_length' => ( is => 'rw', isa => 'Int' ); +has 'features' => ( is => 'rw', isa => 'ArrayRef' ); +has 'start' => ( is => 'rw', isa => 'Int' ); +has 'end' => ( is => 'rw', isa => 'Int' ); + +has 'avgRowLength' => ( is => 'rw', isa => 'Int' ); +has 'calc_height' => ( is => 'rw', isa => 'Int' ); +has 'calc_width' => ( is => 'rw', isa => 'Int' ); +has 'x_offset' => ( is => 'rw', isa => 'Num' ); +has 'y_offset' => ( is => 'rw', isa => 'Num' ); +has 'ils' => ( is => 'rw', isa => 'Num' ); +has 'width_mode' => ( is => 'rw', isa => 'Str' ); +has 'width_value' => ( is => 'rw', isa => 'Num' ); +has 'rows' => ( is => 'rw', isa => 'Num' ); +has 'split_factor' => ( is => 'rw', isa => 'Num' ); + +has 'rowdata' => ( is => 'rw', isa => 'HashRef' ); +has '_internal_maxrowlength' => ( is => 'rw', isa => 'Num' ); + +my $color_spec = CPT::Plot::Colours->new( 'default' => '#000000' ); +our ( $parser, $tree, $cb ); + +sub init { + my ($self) = @_; + my %classes; + my %cs = %{ $self->color_scheme() }; + foreach my $key ( keys %cs ) { + $classes{$key} = CPT::Plot::Class->new( + 'key' => $key, + 'color' => $cs{$key}{color}, + 'border' => $cs{$key}{border}, + 'plot' => $cs{$key}{plot}, + 'included' => 1, + ); + } + $self->classes( \%classes ); + $self->init_label_stuff(); + $self->filterFeatues(); +} + +sub init_label_stuff { + my ($self) = @_; + + if ( $self->{'label_from'} eq 'custom' ) { + use Parse::BooleanLogic; + $parser = new Parse::BooleanLogic( operators => [ '', 'OR' ] ); + $tree = $parser->as_array( $self->label_query ); + print $parser; + + #foreach bio feature, + #if solve == 1, then add to our return, + #else doesn't match + #endforeach + #my $new_tree = $parser->solve($tree,$filter); + $cb = sub { + my $query = $_[0]->{'operand'}; + my $feature = $_[1]; + + my $negate = 0; + if ( substr( $query, 0, 1 ) eq '!' ) { #negate + $negate = 1; + $query = substr( $query, 1 ); + } + if ( $query =~ m/([^:]+):["']{0,1}([^'"]*)["']{0,1}/ ) { + my ( $k, $v ) = ( $1, $2 ); + my $result; + if ( $k eq 'contains' ) { + my $values = join( + "\t", + map { + if ( $_ ne + "translation" ) + { + join( + '', + $feature + ->get_tag_values + ( + $_ + ) + ); + } + } $feature->get_all_tags() + ); + if ( $values =~ m/$v/i ) { + $result = 1; + } + else { + $result = 0; + } + } + elsif ( $k eq 'key' ) { + if ( $v =~ m/,/ ) { + $result = 0; + foreach ( split( /,/, $v ) ) { + if ( $feature + ->primary_tag + eq $_ ) + { + $result = 1; + } + } + } + else { + $result = + $feature->primary_tag eq $v; + } + } + elsif ( $k eq 'tag' ) { + if ( $v =~ m/([^=]+)=(.*)/ ) { + my ( $tag_name, $tag_match ) = + ( $1, $2 ); + if ( $feature->has_tag($1) ) { + if ( + join( + '', + $feature + ->get_tag_values + ( + $1 + ) + ) =~ /$2/i + ) + { + $result = 1; + } + else { + $result = 0; + } + } + else { + $result = 0; + } + } + else { + $result = $feature->has_tag($v); + } + } + else { + + #error + $result = 0; + } + return ( $negate xor $result ); + } + else { + + #error + return 0; + } + + #error + return 0; + }; + } +} + +sub filterFeatues { + my ($self) = @_; + + #$self->{'wanted_tags'} = map { $_ => 1 } split(/,/,$self->{'q'}); + my %tags = map { $_ => 1 } split( /,/, "tRNA,CDS" ); + $self->wanted_tags( \%tags ); + my @feats = @{ $self->features() }; + for my $feat_object (@feats) { + my $should_add = 1; + if ( $feat_object->primary_tag eq 'source' ) { + $should_add = 0; + } + if ( $feat_object->primary_tag eq 'gene' ) { + $should_add = 0; + } + if ( defined $self->start() + && $feat_object->start < $self->start() ) + { + $should_add = 0; + } + if ( defined $self->end() + && $feat_object->end > $self->end() ) + { + $should_add = 0; + } + if ($should_add) { + $self->addGene($feat_object); + } + } +} + +sub addGene { + my ( $self, $feat_object ) = @_; + my $tag = $feat_object->primary_tag; + my $label = ""; + if ( $self->label() ) { + +#If it meets the criteria specified for labelling an object, set the label, else don't set a label + if ( $self->label_from() eq 'custom' ) { + if ( $parser->solve( $tree, $cb, $feat_object ) ) { + if ( + $feat_object->has_tag( + $self->label_text_source() + ) + ) + { + $label = join( + ' ', + $feat_object->get_tag_values( + $self + ->label_text_source( + ) + ) + ); + } + else { + $label = '[]'; + } + } + +#if($feat_object->has_tag($self->label_text_source())){ +#$label = ' '.join(' ', $feat_object->get_tag_values($self->label_text_source())); +#} + } + elsif ( $self->label_from() eq 'numeric' ) { + if ( ${ $self->wanted_tags() }{$tag} ) { + $label = $self->label_numbering_count(); + $self->label_numbering_count( + $self->label_numbering_count() + 1 ); + } + } + else { + die $self->label_from(); + } + } + my @color_arr; + my $color; + if ( $feat_object->has_tag('color') ) { + push( @color_arr, $feat_object->get_tag_values('color') ); + } + if ( $feat_object->has_tag('color') ) { + push( @color_arr, $feat_object->get_tag_values('color') ); + } + if ( scalar @color_arr ) { + $color = $color_arr[0]; + } + + my $gene = CPT::Plot::Gene->new( + 'tag' => $tag, + 'label' => $label, + 'start' => $feat_object->start, + 'end' => $feat_object->end, + 'strand' => $feat_object->strand, + 'color' => $color, + ); + +#This is a "failsafe" addition of classes, in case the user didn't specify a color + if ( !defined ${ $self->classes() }{$tag} ) { + ${ $self->classes() }{$tag} = CPT::Plot::Class->new( + 'key' => $tag, + 'color' => '#000000', + 'border' => 1, + 'plot' => 1, + 'included' => 1, + ); + } + else { + ${ $self->classes() }{$tag}->addObject($gene); + } +} + +sub partitionLines { + my ($self) = @_; + +# To use when I finally get partitioning.pm working +#sub partitionLines{ +# my ($self) = @_; +# +# my $partioner = Partitioning->new( +# genome_length => $self->genome_length(), +# rows => $self->rows(), +# justified => $self->justified(), +# ); +# +# # Add data to it +# foreach(keys %classes){ +# if($classes{$_}->isIncludedInPartioning()){ +# $partioner->add($classes{$_}->getItemList()); +# } +# } +# # Run && get Results +# my %result = %{$partioner->run()}; +# # . . . +# print Dupmer %results; +# # Profit +# exit 1; +# # This is supposed to merge two hashes. [http://perldoc.perl.org/perlfaq4.html#How-do-I-merge-two-hashes%3f] +# @self{keys %result} = values %result; + + my @items; + + $self->avgRowLength( + int( + $self->genome_length() / + $self->rows() * + $self->split_factor() + ) + ) + ; #TODO, allow adjusting or just re-calc? need to benchmark first I guess. + $self->calc_height( int( ( 1 + $self->rows() ) * $self->ils() ) ); + + if ( $self->width_mode() eq 'dynamic' ) { + $self->calc_width( + int( $self->avgRowLength() / $self->width_value() ) ); + } + else { + $self->calc_width( $self->width_value() ); + } + + my $fake_count = 100; + if ($fake_count) { + for ( my $i = 0 ; $i <= $fake_count ; $i++ ) { + my $key = + int( $self->genome_length() * $i / $fake_count ); + push( @items, [ $key, $key, 1 ] ); + } + } + + my %classes = %{ $self->classes() }; + foreach ( keys %classes ) { + if ( $classes{$_}->included() ) { + push( @items, @{ $classes{$_}->getItemList() } ); + } + } + + #Sort based on where each item starts + @items = sort { ${$a}[0] <=> ${$b}[0] } @items; + + #my $z = '(' . join('),(',map { "${$_}[0],${$_}[1]" } @items ) . ')'; + #print join("\n",split(/(.{1,120})/msxog, $z)) . "\n"; + my %rowdata; + + my ( $longest_last_object, $thisRowEnd, $currentRow ) = + ( 1, 1 + $self->avgRowLength(), 1 ); + $rowdata{1}{start} = 1; + foreach my $item_ref (@items) { + my ( $item_start, $item_end ) = @{$item_ref}; + + #print "\t$item_start\t$item_end\t$thisRowEnd\n"; + if ( $item_start >= $thisRowEnd || $item_end > $thisRowEnd ) { + + # This was just cleaned up from the following commented out piece of code + if ( $self->justified() eq 'justify' + || $item_start >= $rowdata{$currentRow}{end} ) + { + $rowdata{$currentRow}{end} = $thisRowEnd; + } + else { + $rowdata{$currentRow}{end} = + max( $longest_last_object, $item_start ); + } + + # There was a corner case here: + # O represents the end of a gene, + # --- represents a gene + # | represents $thisRowEnd + # + # + # ------O | O--------- + # In this case, the second end would be chosen as + # max($longest_last_object,$item_start), which is NOT what we + # want. You want | to be chosen, not O, so in the case that + # item_start is >= current row end (or should that be >?), we + # use this. + # + # ------O | + # O--+-------- + # This case works fine + # + # + # ------O | + # O--------+-------- + # This case also works fine + # + # + # if($self->justified()){ + # $rowdata{$currentRow}end() = $thisRowEnd; + # }else{ + # if($item_start <= $rowdata{$currentRow}end()){ + # $rowdata{$currentRow}end() = max($longest_last_object,$item_start); + # }else{ + # $rowdata{$currentRow}end() = $thisRowEnd; + # } + # } + $self->_internal_maxrowlength( + max( + $self->_internal_maxrowlength(), + $rowdata{$currentRow}{end} - + $rowdata{$currentRow}{start} + ) + ); + $currentRow++; + + #print "$item_start $rowdata{$currentRow-1}{end}\n"; + if ( $item_start <= $rowdata{ $currentRow - 1 }{end} ) { + $rowdata{$currentRow}{start} = $item_start; + } + else { #nonjustified never encounters the following line + $rowdata{$currentRow}{start} = + $rowdata{ $currentRow - 1 }{end} + 1; + } + $thisRowEnd = + $self->avgRowLength() + $rowdata{$currentRow}{start}; + } + } + +# if($self->justified()){ +# foreach my $item_ref(@items){ +# my ($item_start, $item_end) = @{$item_ref}; +# # If the item starts OR ends after this row is supposed to end +# # print "\t$item_start\t$item_end\t$thisrowend\n"; +# if($item_start >= $thisRowEnd || $item_end > $thisRowEnd){ +# $rowdata{$currentRow}end() = $thisRowEnd; +# #Internal max row length is the length of the longest row +# $self->_internal_maxrowlength'} = max($self->{'_internal_maxrowlength'},$rowdata{$currentRow}{'end'}-$rowdata{$currentRow}{'start()); +# #Update which row we're on (so we aren't using +1s everywhere) +# $currentRow++; +# if($item_start <= $rowdata{$currentRow-1}end()){ +# $rowdata{$currentRow}start() = $item_start; +# }else{ +# $rowdata{$currentRow}start'} = $rowdata{$currentRow-1}{'end() + 1; +# } +# #tracks where the current row ends +# #print Dumper $rowdata; +# #print ">>$thisRowEnd\t".$self->avgRowLength'}." + ".$rowdata{$currentRow}{'start()."\n"; +# $thisRowEnd = $self->avgRowLength'} + $rowdata{$currentRow}{'start(); +# #print ">>$thisRowEnd\t".$self->avgRowLength'}." + ".$rowdata{$currentRow}{'start()."\n"; +# } +# } +# }else{#Non justified, raggedright +# foreach my $item_ref(@items){ +# my ($item_start, $item_end) = @{$item_ref}; +# #print "\t$item_start\t$item_end\t$thisrowend\n"; +# if($item_start >= $thisRowEnd || $item_end > $thisRowEnd){ +## print "\t> $item_start\t$item_end\t$thisRowEnd\n"; +## print "Candidate for ending [" . ($item_start >= $thisRowEnd) ."]\t[" .($item_end >= $thisRowEnd) . "]\n"; +## # If we have ``justified'' rulers, they all need to the be the SAME length (until the last) +## print " -- $rowdata{$currentRow}end()$thisRowEnd\n"; +# $rowdata{$currentRow}end() = max($longest_last_object,$item_start); +# #Internal max row length is the length of the longest row +# $self->_internal_maxrowlength'} = max($self->{'_internal_maxrowlength'},$rowdata{$currentRow}{'end'}-$rowdata{$currentRow}{'start()); +# #Update which row we're on (so we aren't using +1s everywhere) +# $currentRow++; +# #if($item_start <= $rowdata{$currentRow-1}end()){ +# $rowdata{$currentRow}start() = $item_start; +# #} +# #tracks where the current row ends +# $thisRowEnd = $self->avgRowLength'} + $rowdata{$currentRow}{'start(); +# } +# $longest_last_object = max($longest_last_object,$item_end); +# } +# } +#make sure the final row length is set, in addition to the _int_max_rowlength + $thisRowEnd = $rowdata{$currentRow}{end} = + $self->genome_length() + 1; #Putative + $self->_internal_maxrowlength( + max( + $self->_internal_maxrowlength(), + $rowdata{$currentRow}{end} - + $rowdata{$currentRow}{start} + ) + ); + $rowdata{max} = $currentRow; + + if ( defined $self->{start} && defined $self->{end} ) { + %rowdata = ( + '1' => + { 'end' => $self->{end}, 'start' => $self->{start} }, + 'max' => 1, + ); + } + + $self->rowdata( \%rowdata ); + +} + +sub getSVG { + my ($self) = @_; + return $self->svg(); +} + +# SVG +sub createSVG { + my ($self) = @_; + my %rowdata = %{ $self->rowdata() }; + $self->calc_height( int( ( 1 + $rowdata{max} ) * $self->ils() ) ); + if ( $self->width_mode() eq 'dynamic' ) { + $self->calc_width( + int( $self->avgRowLength() / $self->width_value() ) ); + } + else { + $self->calc_width( $self->width_value() ); + } + + $self->svg( + SVG->new( + width => $self->calc_width() + 2 * $self->x_offset(), + height => $self->calc_height() + 2 * $self->y_offset(), + ) + ); + +#$self->svg()->title( id => 'documenfeatures from t-title' )->cdata("Genome Map of [$file_name]"); + + my $ui_group = $self->svg()->tag( + 'g', + id => 'group_ui', + style => { + stroke => '#000000', + fill => '#000000', + 'fill-opacity' => 1, + } + ); + + foreach ( my $i = 1 ; $i <= $rowdata{max} ; $i++ ) { + $self->_addRuler( $i, $ui_group ); + } + + my %classes = %{ $self->classes() }; + foreach my $class_key ( keys %classes ) { + + #print "Adding features from $class_key\n"; + my $class = $classes{$class_key}; + if ( !$class->plot() ) { + next; + } + my $group = $self->svg()->tag( + 'g', + id => 'group_' . $class->key(), + style => { + stroke => ( + $class->plot() + ? ( + $class->border() + ? "black" + : "none" + ) + : 'none' + ), + fill => $class->color(), + 'fill-opacity' => $self->opacity(), + } + ); + my @data = @{ $class->getObjects() }; + foreach my $gene (@data) { + my ( $start, $end ) = + ( $gene->start(), $gene->end() ); + my $row = calculateRow( $self, $start, $end ); + addFeature( + $self, + group => $group, + row => $row, + start => $start, + end => $end, + key => $gene->tag(), + strand => $gene->strand(), + label => $gene->label(), + ui_group => $ui_group, + color => $gene->color(), + ); + + } + } + +} + +sub calculateRow { + my ( $self, $start, $end ) = @_; + my %rowdata = %{ $self->rowdata() }; + for ( my $i = 1 ; $i <= $rowdata{max} ; $i++ ) { + if ( + $start > $rowdata{$i}{start} - 1 + && $start < $rowdata{$i}{end} + 1 + && $end > $rowdata{$i}{start} - 1 + && $end < $rowdata{$i}{end} + 1 + + ) + { + return $i; + } + } + +#print "$start,$end,".$self->rowdata'}{$i}{'start'}.",".$self->{'rowdata'}{$i}{'end()."<\/b>\n"; + return 1.5; +} + +sub _addRuler { + my ( $self, $row, $ui_group ) = @_; + my $y_fix = $self->ils() * ( $row - 1 ); + + # my @d = ( + # $self->calc_width(), + # $self->rowdata'}{$row}{'end(), + # $self->rowdata'}{$row}{'start(), + # ($self->rowdata'}{$row}{'end'}-$self->{'rowdata'}{$row}{'start()), + # $self->_internal_maxrowlength(), + # ); + # print join("\t",@d),"\n"; + my %rowdata = %{ $self->rowdata() }; + my $line_width = + $self->calc_width() * + ( $rowdata{$row}{end} - $rowdata{$row}{start} ) / + $self->_internal_maxrowlength(); + +#print "Adding ruler\t".$self->rowdata'}{$row}{'start'}."\t".$self->{'rowdata'}{$row}{'end'}."\t" . ($self->{'rowdata'}{$row}{'end'} - $self->{'rowdata'}{$row}{'start()) . "\n"; + + $ui_group->line( + id => 'ui_element_' . ( $self->line_count() + rand() ), + x1 => 0 + $self->x_offset(), + x2 => $line_width + $self->x_offset(), + y1 => $y_fix + $self->y_offset(), + y2 => $y_fix + $self->y_offset() + ); + + # print "Ruler is being plotted from $y_fix to $line_width\n"; + if ( $self->separate_strands() ) { + + #$ui_group->rectangle( + #id => 'ui_element_' . ( $self->line_count() + rand() ) . "_" . rand(1), + #x => 0 + $self->x_offset(), + #y => $y_fix - 2.5 + $self->y_offset(), + #width => $line_width, + #height => 5 + #); + + #$y_fix += 100; + } + + if ( $self->double_line_for_overlap() && $row > 1 ) + { #This shows any duplicated part of the scale + if ( $rowdata{ $row - 1 }{end} - $rowdata{$row}{start} >= 0 ) + { #Equal to zero indicates ONE base of overlap + $ui_group->line( + id => 'ui_element_' + . ( $self->line_count() + rand() ), + y1 => $y_fix - 5 + $self->y_offset(), + y2 => $y_fix - 5 + $self->y_offset(), + x1 => 0 + $self->x_offset(), + x2 => $self->calc_width() * ( + $rowdata{ $row - 1 }{end} - + $rowdata{$row}{start} + ) / $self->_internal_maxrowlength() + + $self->x_offset(), + +#$calc_width*($rowdata{$row-1}end'}-$rowdata{$row}{'start'})/$self->{'_internal_maxrowlength'} + $self->{'x_offset(), + ); + } + } + $ui_group->line( + id => 'ui_element_' . ( $self->line_count() + rand() ), + x1 => 0 + $self->x_offset(), + x2 => $line_width + $self->x_offset(), + y1 => $y_fix + $self->y_offset(), + y2 => $y_fix + $self->y_offset() + ); + foreach ( $rowdata{$row}{start} - 1 .. $rowdata{$row}{end} ) { + if ( $_ % 1000 == 0 && $_ % 10000 != 0 ) { + my $current_location = + $self->calc_width() * + ( $_ - $rowdata{$row}{start} ) / + $self->_internal_maxrowlength(); + $ui_group->line( + id => 'ui_element_' + . ( $self->line_count() + rand() ), + x1 => $current_location + $self->x_offset(), + x2 => $current_location + $self->x_offset(), + y1 => $y_fix + $self->y_offset(), + y2 => $y_fix + 5 + $self->y_offset(), + ); + } + if ( $_ % 10000 == 0 ) { + my $current_location = + $self->calc_width() * + ( $_ - $rowdata{$row}{start} ) / + $self->_internal_maxrowlength(); + $ui_group->line( + id => 'ui_element_' + . ( $self->line_count() + rand() ), + x1 => $current_location + $self->x_offset(), + x2 => $current_location + $self->x_offset(), + y1 => $y_fix + $self->y_offset(), + y2 => $y_fix + 10 + $self->y_offset(), + ); + $ui_group->text( + id => 'ui_text' + . ( $self->line_count() + rand() ), + x => $current_location + 10 + $self->x_offset(), + y => $y_fix + 20 + $self->y_offset(), + -cdata => ( $_ / 1000 ) . " kb", + 'fill' => '#000000', + 'fill-opacity' => 1, + 'font-family' => 'mono', + 'stroke' => 'none' + ); + } + + if ( + ( + $_ == $rowdata{$row}{start} - 1 + || $_ == $rowdata{$row}{end} + ) + && ( $_ % 10000 != 0 ) + ) + { + my $current_location = + $self->calc_width() * + ( $_ - $rowdata{$row}{start} ) / + $self->_internal_maxrowlength(); + $ui_group->line( + id => 'ui_element_' + . ( $self->line_count() + rand() ), + x1 => $current_location + $self->x_offset(), + x2 => $current_location + $self->x_offset(), + y1 => $y_fix + $self->y_offset(), + y2 => $y_fix + 10 + $self->y_offset(), + ); + $ui_group->text( + id => 'ui_text' + . ( $self->line_count() + rand() ), + x => $current_location + $self->x_offset(), + y => $y_fix + 20 + $self->y_offset(), + -cdata => sprintf( '%d kb', ( $_ / 1000 ) ), + 'fill' => '#000000', + 'fill-opacity' => 1, + 'font-family' => 'mono', + 'stroke' => 'none' + ); + } + } +} + +sub addFeature { + my ( $self, %data ) = @_; + my %rowdata = %{ $self->rowdata() }; + my $x = + $self->calc_width() * + ( $data{'start'} - $rowdata{ $data{'row'} }{'start'} ) / + $self->_internal_maxrowlength() + $self->x_offset(); + my $w = + $self->calc_width() * + ( $data{'end'} - $data{'start'} ) / + $self->_internal_maxrowlength(); + my $h = 15; + my $y = + ( $data{'row'} - 1 ) * $self->ils() + $self->y_offset() - $h / 2; + + my $id = "$x$y$w$h" . rand(); + +#print "Item(".$data{'start'}.",".$data{'end'}.",".$data{'row'}.") =\t($x,$y,$w,$h)\n"; + + if ( $self->separate_strands() ) { + $y += -$data{'strand'} * 30; + } + + if ( $self->view() eq 'alt_random' ) { # Max add = 20 + $y += 4 * ( $x % 5 ); + } + elsif ( $self->view() eq 'alt_every' ) { # Max add = 10 + # We (Sort of like a convolution?) multiply by strand This has + # the following effect; when on the top strand, we will only + # ever add a positive to the height of the item (moving it + # downward and closer to the ruler). On the bottom strand + # however, we only ever add a negative to the height of the + # item (moving it upwards towards the ruler). This allows the + # items on the top and bottom to stay balanced. + $y += + $data{'strand'} * 10 * + ( ( $self->_ft_count( $self->_ft_count() + 1 ) ) % 2 ) - + 10 * $data{'strand'}; + + # However, This is imperfect, since we add items based on class, + # not from left to right + } + elsif ( $self->view() eq 'alt_artemis' ) { # Max add = 20? + # Muwahahahaha. Sorry. Determined coefficient and constant by + # trial and error, but this matches up with the artemis view + # without an if/else based on which strand. :D + $y += + 10 * ( ( $data{'start'} - 2 * $data{'strand'} + 1 ) % 3 ) - + 10 * $data{'strand'}; + } + + my $item_color = $color_spec->getColour( $data{'color'} ); + if ($item_color) { + $data{'group'}->rectangle( + x => ($x), + y => $y, + width => $w, + height => $h, + id => $id, + fill => $color_spec->getColour( $data{'color'} ) + ); + } + else { + $data{'group'}->rectangle( + x => ($x), + y => $y, + width => $w, + height => $h, + id => $id, + ); + } + if ( $self->label() && $data{'label'} ) { + + my ( $lx, $ly ); + my @char_data = split( //, $data{label} ); + + #Exit early if we don't even want to plot. + my $is_too_small = ( scalar(@char_data) * 2 > $w ); + if ( $self->label_shrink_mode() eq 'cutoff' && $is_too_small ) { + return; + } + + #Font Scaling + my $font_scaling = 100; + if ( $self->label_shrink_mode() eq 'shrink' ) { + $font_scaling *= $w / ( 8 * scalar(@char_data) ); + } + + # Horizontal positioning + $lx = + $x + + $w / 2 + ; #Horizontally center it, but this is by the leading edge of the text + if ( scalar(@char_data) * 8 > $w + && $self->label_shrink_mode() eq 'shrink' ) + { + $lx -= + scalar(@char_data) * 4 * + $font_scaling / 100 + ; #Adjustment for scaled text. Determined by experiment + } + else { + $lx -= + scalar(@char_data) * 4 + ; #Move four pixels left for every character in the label + } + + # Vertical positioning + if ( $self->label_pos() eq "above" ) { #Label is ABOVE + if ( $self->separate_strands() + && $data{'strand'} == -1 ) + { + $ly = + $y + + $h / 2 + 10 + 30 + ; #Need to consider below strand, only one strand. + } + else { + $ly = + $y + + $h / 2 - 30 + ; #Need to consider below strand, only one strand. + } + } + else { #Label is ON + $ly = $y + $h / 2 + 5; + } + + if ( $data{'label'} !~ /^gene_[0-9]+$/ ){ + $self->plot_label( $lx, $ly, $font_scaling, $data{'label'}, + $data{'ui_group'} ); + + if ( $self->label_callouts() + && $self->label_pos() eq "above" ) + { + $data{'ui_group'}->line( + id => 'l' . "_" . rand(1), + x1 => $x + ( $w / 2 ), + x2 => $x + ( $w / 2 ), + y1 => ( + $self->separate_strands() + && $data{'strand'} eq '-1' ? $y + $h + : $y + ), + y2 => ( + $self->separate_strands() + && $data{'strand'} eq '-1' ? $ly - 12 + : $ly + ) + ); + } + } + + } +} + +sub plot_label { + my ( $self, $x, $y, $font_size, $label, $ui_group ) = @_; + if ( $font_size < 80 ) { + $font_size = 80; + } + $ui_group->text( + id => 'text' . rand(1), + x => $x, + y => $y, + -cdata => $label, + 'fill' => '#000000', + 'fill-opacity' => 1, + 'font-family' => 'mono', + 'font-size' => $font_size . '%', + 'stroke' => 'none' + ); +} + +sub max ($$) { $_[ $_[0] < $_[1] ] } +sub min ($$) { $_[ $_[0] > $_[1] ] } + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Plot::Base - Main plotting class for genome mapper + +=head1 VERSION + +version 1.96 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Plot/Class.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Plot/Class.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,70 @@ +package CPT::Plot::Class; +use Moose; +use Data::Dumper; + +# ABSTRACT: Class of objects for use in a genome map +# +has 'objects' => ( is => 'rw', isa => 'ArrayRef', default => sub { [] } ); + +# Should this class be used in calculation of partitions +has 'included' => ( is => 'rw', isa => 'Bool' ); + +has 'key' => ( is => 'rw', isa => 'Str' ); +has 'color' => ( is => 'rw', isa => 'Str' ); +has 'border' => ( is => 'rw', isa => 'Str' ); +has 'plot' => ( is => 'rw', isa => 'Bool' ); + +sub addObject { + my ( $self, $object ) = @_; + push( @{ $self->objects() }, $object ); +} + +sub getItemList { + my ($self) = @_; + my @items; + foreach ( @{ $self->objects() } ) { + push( @items, $_->getLocations() ); + } + return \@items; +} + +sub getObjects { + my ($self) = @_; + return $self->objects(); +} + +sub getMemberCount { + my ($self) = @_; + return scalar @{ $self->objects() }; +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Plot::Class - Class of objects for use in a genome map + +=head1 VERSION + +version 1.96 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Plot/Colours.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Plot/Colours.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,82 @@ +package CPT::Plot::Colours; + +# ABSTRACT: Color transformation library. + +our %artemis_colours = ( + 0 => 'rgb(255,255,255)', + 1 => 'rgb(100,100,100)', + 2 => 'rgb(255,0,0)', + 3 => 'rgb(0,255,0)', + 4 => 'rgb(0,0,255)', + 5 => 'rgb(0,255,255)', + 6 => 'rgb(255,0,255)', + 7 => 'rgb(255,255,0)', + 8 => 'rgb(152,251,152)', + 9 => 'rgb(135,206,250)', + 10 => 'rgb(255,165,0)', + 11 => 'rgb(200,150,100)', + 12 => 'rgb(255,200,200)', + 13 => 'rgb(170,170,170)', + 14 => 'rgb(0,0,0)', + 15 => 'rgb(255,63,63)', + 16 => 'rgb(255,127,127)', + 17 => 'rgb(255,191,191)', +); + +sub new { + my $class = shift; + my $self = {@_}; + bless $self, $class; + return $self; +} + +sub getColour { + my ( $self, $string ) = @_; + if ($string) { + my $colour_result; + if ( $string =~ qr/^\s*(\d+)\s*$/ ) { + $colour_result = $artemis_colours{$1}; + } + elsif ( $string =~ qr/^\s*(\d+)\s+(\d+)\s+(\d+)\s*$/ ) { + $colour_result = "rgb($1,$2,$3)"; + } + else { + warn "Bad Colour Specfication"; + return undef; + } + return $colour_result; + } + else { + return undef; + } +} + +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Plot::Colours - Color transformation library. + +=head1 VERSION + +version 1.96 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Plot/Gene.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Plot/Gene.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,48 @@ +package CPT::Plot::Gene; +use Moose; +use strict; +use warnings; + +# ABSTRACT: Stupid representation of a gene. Does not handle joined genes +has 'start' => ( is => 'rw', isa => 'Int' ); +has 'end' => ( is => 'rw', isa => 'Int' ); +has 'tag' => ( is => 'rw', isa => 'Str' ); +has 'label' => ( is => 'rw', isa => 'Str' ); +has 'strand' => ( is => 'rw', isa => 'Str' ); +has 'color' => ( is => 'rw', isa => 'Any' ); + +sub getLocations { + my ($self) = @_; + return [ $self->start(), $self->end() ]; +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Plot::Gene - Stupid representation of a gene. Does not handle joined genes + +=head1 VERSION + +version 1.96 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Plot/Label.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Plot/Label.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,35 @@ +package CPT::Plot::Base::Label; +use Moose; + +# ABSTRACT: Abstraction of a label. Obviously not implemented + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Plot::Base::Label - Abstraction of a label. Obviously not implemented + +=head1 VERSION + +version 1.96 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Report.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Report.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,68 @@ +package CPT::Report; +use Moose::Role; +use strict; +use warnings; +use autodie; + +requires 'header'; +requires 'footer'; +requires 'h1'; +requires 'h2'; +requires 'h3'; +requires 'h4'; +requires 'h5'; +requires 'h6'; +requires 'p'; +requires 'list_start'; +requires 'list_end'; +requires 'list_element'; + +has 'title' => ( is => 'rw', isa => 'Str' ); +has 'date' => ( is => 'rw', isa => 'Str'); +has 'author' => ( is => 'rw', isa => 'Str' ); +# Core content that we build up. +has 'content' => ( is => 'rw', isa => 'Str', default => ""); + +# Internal +has '_list_type' => ( is => 'rw', isa => 'Str', default => 'bullet'); + +sub a{ + my ($self, $addition) = @_; + $self->content($self->content() . $addition); +} + +sub get_content{ + my ($self) = @_; + return $self->header() . $self->content() . $self->footer(); +} + +no Moose::Role; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Report + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Report/HTML.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Report/HTML.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,165 @@ +package CPT::Report::HTML; +no warnings; +use Moose; +use Carp; +with 'CPT::Report'; +use CGI; + +has cgi => ( + is => 'rw', + isa => 'Any', + default => sub { + CGI->new(); + }, + # other attributes +); + +sub header { + my ($self) = @_; + return $self->cgi()->start_html( + -style => { -src => ['http://netdna.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap.min.css','http://netdna.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap-theme.min.css'] } + ); +} + +sub footer{ + my ($self) = @_; + return $self->cgi()->end_html(); +} +sub h1{ + my ($self, $addition) = @_; + $self->a($self->cgi()->h1($addition)); +} +sub h2{ + my ($self, $addition) = @_; + $self->a($self->cgi()->h2($addition)); +} +sub h3{ + my ($self, $addition) = @_; + $self->a($self->cgi()->h3($addition)); +} +sub h4{ + my ($self, $addition) = @_; + $self->a($self->cgi()->h4($addition)); +} +sub h5{ + my ($self, $addition) = @_; + $self->a($self->cgi()->h5($addition)); +} +sub h6{ + my ($self, $addition) = @_; + $self->a($self->cgi()->h6($addition)); +} +sub p{ + my ($self, $addition) = @_; + $self->a($self->cgi()->p($addition)); +} +sub b{ + my ($self, $addition) = @_; + $self->a($self->cgi()->b($addition)); +} + +sub finalize_table{ + my ($self) = @_; + my @td; + if(defined $self->_table_header() && scalar @{$self->_table_header()} > 0 ){ + push(@td, $self->cgi->th($self->_table_header())); + } + foreach(@{$self->_table_data()}){ + push(@td, $self->cgi->td($_)); + } + + $self->a($self->cgi()->table( + {-class => "table table-striped"}, + $self->cgi->Tr(\@td) + ) + ); + + # Reset for next usage + $self->_table_header([]); + $self->_table_data([]); +} + +has _table_header => ( + is => 'rw', + isa => 'ArrayRef', +); +has _table_data => ( + is => 'rw', + isa => 'ArrayRef', + default => sub { [] }, +); + + +sub table_header { + my ($self, @values) = @_; + $self->_table_header(\@values); +} + +sub table_row { + my ($self, @values) = @_; + my @current = @{$self->_table_data()}; + push(@current, \@values); + $self->_table_data(\@current); +} + +sub list_start { + my ($self, $type) = @_; + if($type ne 'number' && $type ne 'bullet'){ + carp 'Must use number or bullet as list type'; + } + if($self->_list_type() eq 'number'){ + $self->a('
    '); + }else{ + $self->a('
      '); + } + $self->_list_type($type); +} + +sub list_end { + my ($self) = @_; + if($self->_list_type() eq 'number'){ + $self->a('
'); + }else{ + $self->a(''); + } +} + +sub list_element { + my ($self, $element_text) = @_; + $element_text =~ s{&}{&}gso; + $element_text =~ s{<}{<}gso; + $element_text =~ s{>}{>}gso; + $element_text =~ s{"}{"}gso; + $self->a(sprintf('
  • %s
  • ', $element_text)); +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Report::HTML + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Report/Pandoc.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Report/Pandoc.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,99 @@ +package CPT::Report::Pandoc; +no warnings; +use Moose; +with 'CPT::Report'; +use Carp; + +sub header { + my ($self) = @_; + return sprintf "%% %s\n%% %s\n%% %s\n\n", $self->{title}, $self->{date}, $self->{author}; +} + +sub footer{ + my ($self) = @_; + return ''; +} + +sub h1{ + my ($self, $addition) = @_; + $self->a(sprintf("\n\n# %s\n\n", $addition)); +} +sub h2{ + my ($self, $addition) = @_; + $self->a(sprintf("\n\n## %s\n\n", $addition)); +} +sub h3{ + my ($self, $addition) = @_; + $self->a(sprintf("\n\n### %s\n\n", $addition)); +} +sub h4{ + my ($self, $addition) = @_; + $self->a(sprintf("\n\n#### %s\n\n", $addition)); +} +sub h5{ + my ($self, $addition) = @_; + $self->a(sprintf("\n\n##### %s\n\n", $addition)); +} +sub h6{ + my ($self, $addition) = @_; + $self->a(sprintf("\n\n##### %s\n\n", $addition)); +} + +sub p{ + my ($self, $addition) = @_; + $self->a(sprintf("\n%s\n", $addition)); +} + +sub list_start { + my ($self, $type) = @_; + if($type ne 'number' && $type ne 'bullet'){ + carp 'Must use number or bullet as list type'; + } + $self->_list_type($type); +} + +sub list_end { + my ($self) = @_; +} + +sub list_element { + my ($self, $element_text) = @_; + my $preceeding_char; + if($self->_list_type() eq 'number'){ + $preceeding_char = '#'; + }else{ + $preceeding_char = '*'; + } + $self->a(sprintf('%s %s', $preceeding_char, $element_text)); +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Report::Pandoc + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Util.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Util.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,99 @@ +package CPT::Util; +use strict; +use warnings; +use Moose; + +#ABSTRACT: CPT convenience functions + + + +sub JSONYAMLopts { + my ( $self, %data ) = @_; + my %hash; + if ( $data{'file'} ) { + my $ext = substr($data{'file'}, rindex($data{'file'}, '.') + 1); + if ( lc $ext eq 'yaml' || lc $ext eq 'yml' ) { + require YAML::XS; + %hash = %{ YAML::XS::LoadFile( $data{'file'} ) }; + } + elsif ( lc $ext eq 'json' ) { + require JSON::XS; + require File::Slurp; + my $json = File::Slurp::read_file( $data{'file'} ); + %hash = %{ JSON::XS::decode_json($json) }; + } + else { + confess "Requested JSON/YAML file lacked a recognisable suffix $ext"; + } + } + else { + confess 'Error, no options provided'; + } + return \%hash; + +} + + + +sub untaint_path { + delete @ENV{ 'IFS', 'CDPATH', 'ENV', 'BASH_ENV' }; + $ENV{'PATH'} = '/bin:/usr/bin'; + my $path = $ENV{'PATH'}; + return 1; +} + + + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Util - CPT convenience functions + +=head1 VERSION + +version 1.99.4 + +=head1 FUNCTIONAL INTERFACE + + my $libCPT = CPT::CPT->new(); + +=head2 JSONYAMLopts + + my %colour_options = %{ + $libCPT->JSONYAMLopts( + 'file'=>$options{'optionsfile'}, + 'string'=> $options{'optionsstring'} + ) + }; + +Reads from a file or from a string passed to it describing additional options in JSON or YAML. (Should I support other options?) + +For scripts that require significant numbers of input parameters where they are often re-used, it isn't sensible to require people to specify ten flags on the command line. Offering a JSON/YAML file reader simplifies their life by providing re-usable config files. + +=head2 untaint_path + + $libCPT->untaint_path(); + +Convenience function + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Util/CRC64.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Util/CRC64.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,163 @@ +package CPT::Util::CRC64; + +# This was taken from Bio::GMOD::Bulkfiles::SWISS_CRC64 + +use Moose; +use strict; +use warnings; +use autodie; + + +has 'POLY64REVh' => ( is => 'ro', isa => 'Any', default => 0xd8000000 ); +has 'CRCTableh' => ( is => 'rw', isa => 'ArrayRef', default => sub { [] }); +has 'CRCTablel' => ( is => 'rw', isa => 'ArrayRef', default => sub { [] }); +has 'initialized' => ( is => 'rw', isa => 'Bool', default => 0 ); +has 'size' => ( is => 'rw', isa => 'Int' ); +has 'crcl' => (is => 'rw', isa => 'Any', default => 0); +has 'crch' => (is => 'rw', isa => 'Any', default => 0); + +sub add { + my ($self, $sequence) = @_; + my $crcl = $self->crcl(); + my $crch = $self->crch(); + my $size = $self->size(); + my @CRCTableh = @{$self->CRCTableh()}; + my @CRCTablel = @{$self->CRCTablel()}; + + foreach (split //, $sequence){ + my $shr = ($crch & 0xFF) << 24; + my $temp1h = $crch >> 8; + my $temp1l = ($crcl >> 8) | $shr; + my $tableindex = ($crcl ^ (unpack "C", $_)) & 0xFF; + $crch = $temp1h ^ $CRCTableh[$tableindex]; + $crcl = $temp1l ^ $CRCTablel[$tableindex]; + $size++; + } + $self->crcl($crcl); + $self->crch($crch); + $self->size($size); +} + +sub hexsum { + my ($self) = @_; + my $crcl = $self->crcl(); + my $crch = $self->crch(); + return sprintf("%08X%08X", $crch, $crcl); +} + +sub init { + my ($self) = @_; + $self->crcl(0); + $self->crch(0); + $self->size(0); + my @h; + my @l; + my $POLY64REVh = $self->POLY64REVh(); + if(! $self->initialized() ){ + $self->initialized(1); + for (my $i=0; $i<256; $i++) { + my $partl = $i; + my $parth = 0; + for (my $j=0; $j<8; $j++) { + my $rflag = $partl & 1; + $partl >>= 1; + $partl |= (1 << 31) if $parth & 1; + $parth >>= 1; + $parth ^= $POLY64REVh if $rflag; + } + $h[$i] = $parth; + $l[$i] = $partl; + } + $self->CRCTableh(\@h); + $self->CRCTablel(\@l); + } +} + +sub crc64 { + my ($self, $sequence) = @_; + $self->init(); + $self->add($sequence); + return $self->hexsum(); +} + +no Moose; + +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Util::CRC64 + +=head1 VERSION + +version 1.99.4 + +=head1 CRC64 perl module documentation + +=head2 NAME + +CRC64 - Calculate the cyclic redundancy check. + +=head2 SYNOPSIS + + use CPT::Util::CRC64; + + my $crc = CPT::Util::CRC64->new(); + $crc = $crc->add("IHATEMATH"); + #returns the string "E3DCADD69B01ADD1" + +=head2 DESCRIPTION + +SWISS-PROT + TREMBL use a 64-bit Cyclic Redundancy Check for the +amino acid sequences. + +The algorithm to compute the CRC is described in the ISO 3309 +standard. The generator polynomial is x64 + x4 + x3 + x + 1. +Reference: W. H. Press, S. A. Teukolsky, W. T. Vetterling, and B. P. +Flannery, "Numerical recipes in C", 2nd ed., Cambridge University +Press. Pages 896ff. + +=head2 Functions + +=over + +=item crc64 string + +Calculate the CRC64 (cyclic redundancy checksum) for B. + +In array context, returns two integers equal to the higher and lower +32 bits of the CRC64. In scalar context, returns a 16-character string +containing the CRC64 in hexadecimal format. + +=back + +=head1 AUTHOR + +Alexandre Gattiker, gattiker@isb-sib.ch + +Eric Rasche (reworte for CPT framework) + +=head1 ACKNOWLEDGEMENTS + +Based on SPcrc, a C implementation by Christian Iseli, available at +ftp://ftp.ebi.ac.uk/pub/software/swissprot/Swissknife/old/SPcrc.tar.gz + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Writer.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Writer.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,85 @@ +package CPT::Writer; +use Moose::Role; +use strict; +use warnings; +use autodie; + +requires 'process'; +requires 'suffix'; + +# One or the other of these will be set. *ought* to be in accordance w/ galaxy_override +has 'OutputFilesClass' => ( is => 'rw' ); + +# This parameter specifies that we should behave according to galaxy_override spec. +has 'galaxy_override' => ( is => 'rw', isa => 'Bool' ); +has 'title' => ( is => 'rw', isa => 'Str' ); +has 'author' => ( is => 'rw', isa => 'Str' ); +has 'data' => ( is => 'rw' ); +has 'processed_data' => ( is => 'rw' ); +has 'processing_complete' => ( is => 'rw', isa => 'Bool' ); +# What file names were generated during the writing process +has 'used_filenames' => ( is => 'rw', isa => 'ArrayRef', default => sub { [] }); +# An optional, hinted at name. Otherwise we'll generate one. +has 'name' => ( is => 'rw', isa => 'Str' ); + +sub write { + my ($self) = @_; + if ( $self->processing_complete ) { + $self->OutputFilesClass->extension( $self->suffix() ); + my $next_output_file = $self->OutputFilesClass->get_next_file(); + # Store the name of the file we used + push(@{$self->used_filenames()}, $next_output_file); + # Write data out + open(my $outfile, '>', $next_output_file ); + print $outfile $self->processed_data; # given that processed_data is a string... + close($outfile); + } + else { + warn "Write called but processing was not marked as complete. Not writing"; + } +} + +sub get_name { + my ($self) = @_; + #return $self->OutputFilesClass->get_next_file(); + return $self->OutputFilesClass->_get_filename(); +} + +sub process_data { + my ($self) = @_; + if (!defined $self->data ) { + #confess "No data to process."; + } + $self->process(); +} + +no Moose::Role; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Writer + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Writer/Archive.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Writer/Archive.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,85 @@ +package CPT::Writer::Archive; +no warnings; +use Moose; +use Archive::Any::Create; +use File::Copy qw/move/; +with 'CPT::Writer'; + +has format => ( + is => 'ro', + isa => 'Str', + default => sub { + 'tar.gz', + }, +); + +sub process { + my ($self) = @_; + # Should be a Archive::Any::Create object + if(ref $self->data() ne 'Archive::Any::Create'){ + warn 'Tool author sent non Archive::Any::Create data to the writer'; + }else{ + $self->processed_data( $self->data() ); + $self->processing_complete(1); + } +} + +sub write { + my ($self) = @_; + if ( $self->processing_complete ) { + # Force the extension to that of the specified format + $self->OutputFilesClass->extension( $self->format() ); + # Get a filename + my $next_output_file = $self->OutputFilesClass->get_next_file(); + # And get another filename with extension tacked on so we KNOW it'll behave correctly. + my $next_output_file_with_extension = $self->OutputFilesClass->get_next_file() . '.' . $self->format(); + # Store the name of the file we used + push(@{$self->used_filenames()}, $next_output_file); + # Write data out + $self->processed_data->write_file($next_output_file_with_extension); + # If it has been written somewhere other than where we want, + # then we need to move it. + if($next_output_file ne $next_output_file_with_extension){ + move($next_output_file_with_extension, $next_output_file); + } + } + else { + warn +"Write called but processing was not marked as complete. Not writing"; + } + +} + +sub suffix { + return 'csv'; +} +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Writer::Archive + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Writer/CSV.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Writer/CSV.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,110 @@ +package CPT::Writer::CSV; +no warnings; +use Moose; +with 'CPT::Writer'; + +sub process { + my ($self) = @_; + my %data = %{ $self->data }; + + my @sheets = keys %data; + my %complete_processed_data = map { $_ => "" } @sheets; + foreach (@sheets) { + my $tmp_data = ''; + my $data_struc_ref = $data{$_}; + $tmp_data .= '"' + . join( '","', + map { local $_ = $_; s/"/\\"/g; $_ } + @{ ${$data_struc_ref}{'header'} } ) + . '"' . "\n"; + foreach ( @{ ${$data_struc_ref}{'data'} } ) { + $tmp_data .= '"' . join( + '","', + map { + local $_ = $_; + unless (defined $_) { $_ = "" } + s/"/\\"/g; + $_; + } @{$_} + ) . '"' . "\n"; + } + $complete_processed_data{$_} = $tmp_data; + } + $self->processed_data( \%complete_processed_data ); + $self->processing_complete(1); +} + +sub write { + + # Wanted to use child's write method here so I can output multiple files. + my ($self) = @_; + if ( $self->processing_complete ) { + my %complete_processed_data = %{ $self->processed_data() }; + my @sheets = keys %complete_processed_data; + + # When this is initially called, the OutputFilesClass was given + # a hint as to what files from this analysis should be called. + # We'll borrow that and modify it each time before putting it + # back at the end. Since this is the *ONLY* type that has + # sub-reports, it feels O.K. to do it here. + my $base_name = $self->OutputFilesClass->given_filename(); + unless ($base_name) { $base_name = ""; } + foreach (@sheets) { + # We update the base filename to include our + # particular Sheet name. As such the generate function + # should start generating files with that as part of + # the name + $self->OutputFilesClass->given_filename( $base_name . '.' . $_ ); + $self->OutputFilesClass->extension('csv'); + my $next_output_file = + $self->OutputFilesClass->get_next_file(); + + # Store the filename we used + push(@{$self->used_filenames()}, $next_output_file); + open( my $outfile, '>', $next_output_file ); + print $outfile $complete_processed_data{$_}; + close($outfile); + } + # Reset it back to default (probably unnecessary) + $self->OutputFilesClass->given_filename($base_name); + } + else { + warn +"Write called but processing was not marked as complete. Not writing"; + } + +} + +sub suffix { + return 'csv'; +} +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Writer::CSV + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Writer/CSV_U.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Writer/CSV_U.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,110 @@ +package CPT::Writer::CSV_U; +no warnings; +use Moose; +with 'CPT::Writer'; + +sub process { + my ($self) = @_; + my %data = %{ $self->data }; + + my @sheets = keys %data; + my %complete_processed_data = map { $_ => "" } @sheets; + foreach (@sheets) { + my $tmp_data = ''; + my $data_struc_ref = $data{$_}; + $tmp_data .= + join( ',', + map { local $_ = $_; s/"/\\"/g; $_ } + @{ ${$data_struc_ref}{'header'} } ) + . "\n"; + foreach ( @{ ${$data_struc_ref}{'data'} } ) { + $tmp_data .= join( + ',', + map { + local $_ = $_; + unless (defined $_) { $_ = "" } + s/"/\\"/g; + $_; + } @{$_} + ) . "\n"; + } + $complete_processed_data{$_} = $tmp_data; + } + $self->processed_data( \%complete_processed_data ); + $self->processing_complete(1); +} + +sub write { + + # Wanted to use child's write method here so I can output multiple files. + my ($self) = @_; + if ( $self->processing_complete ) { + my %complete_processed_data = %{ $self->processed_data() }; + my @sheets = keys %complete_processed_data; + + # When this is initially called, the OutputFilesClass was given + # a hint as to what files from this analysis should be called. + # We'll borrow that and modify it each time before putting it + # back at the end. Since this is the *ONLY* type that has + # sub-reports, it feels O.K. to do it here. + my $base_name = $self->OutputFilesClass->given_filename(); + unless ($base_name) { $base_name = ""; } + foreach (@sheets) { + # We update the base filename to include our + # particular Sheet name. As such the generate function + # should start generating files with that as part of + # the name + $self->OutputFilesClass->given_filename( $base_name . '.' . $_ ); + $self->OutputFilesClass->extension('csv'); + my $next_output_file = + $self->OutputFilesClass->get_next_file(); + + # Store the filename we used + push(@{$self->used_filenames()}, $next_output_file); + open( my $outfile, '>', $next_output_file ); + print $outfile $complete_processed_data{$_}; + close($outfile); + } + # Reset it back to default (probably unnecessary) + $self->OutputFilesClass->given_filename($base_name); + } + else { + warn +"Write called but processing was not marked as complete. Not writing"; + } + +} + +sub suffix { + return 'csv'; +} +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Writer::CSV_U + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Writer/Dummy.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Writer/Dummy.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,51 @@ +package CPT::Writer::Dummy; +use Moose; +with 'CPT::Writer'; + +sub process { + my ($self) = @_; + $self->processed_data( $self->data ); + $self->processing_complete(1); +} + +sub write { + my ($self) = @_; + # Do nothing. This object sees/hears nothing. + # Except we would like to consume a single filename + push(@{$self->used_filenames()}, $self->OutputFilesClass->get_next_file()); + return; +} + +sub suffix { + return 'txt'; +} +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Writer::Dummy + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Writer/Dumper.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Writer/Dumper.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,45 @@ +package CPT::Writer::Dumper; +use Moose; +with 'CPT::Writer'; + +sub process { + my ($self) = @_; + use Data::Dumper; + my $d = Data::Dumper->new( [ $self->data ] ); + $self->processed_data( $d->Dump ); + $self->processing_complete(1); +} + +sub suffix { + return 'perldump'; +} +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Writer::Dumper + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Writer/Fasta.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Writer/Fasta.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,78 @@ +package CPT::Writer::Fasta; +use Moose; +with 'CPT::Writer'; +require Bio::SeqIO; + +sub process { + my ($self) = @_; + $self->processed_data( $self->data ); + $self->processing_complete(1); + return 1; +} + +sub write { + my ($self) = @_; + if ( $self->processing_complete ) { + $self->OutputFilesClass->extension( $self->suffix() ); + my $next_output_file = $self->OutputFilesClass->get_next_file(); + open( my $filehandle, '>', $next_output_file ); + # This is probably a good change but will need testing. + if ( ref( $self->processed_data ) eq 'Bio::PrimarySeqI') { + my $outseq = Bio::SeqIO->new( + -fh => $filehandle, + -format => 'Fasta', + ); + $outseq->write_seq( $self->processed_data ); + } + elsif ( ref( $self->processed_data ) eq 'ARRAY') { + my $outseq = Bio::SeqIO->new( + -fh => $filehandle, + -format => 'Fasta', + ); + foreach my $seq (@{$self->processed_data()}){ + $outseq->write_seq( $seq ); + } + } + else { + print $filehandle $self->processed_data; + } + close($filehandle); + } + else { + warn "Write called but processing was not marked as complete. Not writing"; + } +} + +sub suffix { + return 'fa'; +} +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Writer::Fasta + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Writer/GFF3.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Writer/GFF3.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,43 @@ +package CPT::Writer::GFF3; +use Moose; +with 'CPT::Writer'; + +sub process { + my ($self) = @_; + $self->processed_data( $self->data ); + $self->processing_complete(1); +} + +sub suffix { + return 'gff3'; +} +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Writer::GFF3 + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Writer/Genomic.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Writer/Genomic.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,144 @@ +package CPT::Writer::Genomic; +use Moose; +with 'CPT::Writer'; + +# Specific format of genomic writer +has 'format' => ( is => 'rw', isa => 'Str', default => 'Genbank'); + +sub process { + my ($self) = @_; + $self->processed_data( $self->data ); + $self->processing_complete(1); + return 1; +} + +sub write { + my ($self) = @_; + if ( $self->processing_complete ) { + $self->OutputFilesClass->extension( $self->suffix() ); + my $next_output_file = $self->OutputFilesClass->get_next_file(); + open( my $filehandle, '>', $next_output_file ); + + require Bio::SeqIO; + my $obj_type = ref $self->processed_data(); + if(substr($obj_type,0,10) eq 'Bio::Seq::'){ + my $outseq = Bio::SeqIO->new( + -fh => $filehandle, + -format => $self->format(), + ); + $outseq->write_seq( $self->processed_data ); + }elsif(substr($obj_type,0,10) eq 'Bio::SeqIO'){ + my $outseq = Bio::SeqIO->new( + -fh => $filehandle, + -format => $self->format(), + ); + while (my $inseq = $self->processed_data()->next_seq()) { + $outseq->write_seq($inseq); + } + }elsif(substr($obj_type,0,8) eq 'Bio::Seq'){ + my $outseq = Bio::SeqIO->new( + -fh => $filehandle, + -format => $self->format(), + ); + $outseq->write_seq( $self->processed_data ); + }elsif(ref $self->processed_data eq 'ARRAY'){ + # Assume array of genomes + my $outseq = Bio::SeqIO->new( + -fh => $filehandle, + -format => $self->format(), + ); + foreach my $inseq(@{$self->processed_data}){ + $outseq->write_seq($inseq); + } + }else{ + print $filehandle $self->processed_data(); + } + close($filehandle); + } + else { + warn +"Write called but processing was not marked as complete. Not writing"; + } +} + +sub suffix { + my ($self) = @_; + my %suffix_map = ( + 'abi' => 'abi', + 'ace' => 'ace', + 'agave' => 'agave', + 'alf' => 'alf', + 'asciitree' => 'txt', + 'bsml' => 'bsml', + 'bsml_sax' => 'bsml', + 'chadoxml' => 'xml', + 'chaos' => 'chaos', + 'chaosxml' => 'xml', + 'ctf' => 'ctf', + 'embl' => 'emb', + 'entrezgene' => 'asn1', + 'excel' => 'xls', + 'exp' => 'exp', + 'fasta' => 'fa', + 'fastq' => 'fastq', + 'game' => 'xml', + 'gcg' => 'gcg', + 'genbank' => 'gbk', + 'interpro' => 'xml', + 'kegg' => 'kegg', + 'largefasta' => 'lfa', + 'lasergene' => 'lasergene', + 'locuslink' => 'll_tmpl', + 'phd' => 'phred', + 'pir' => 'pir', + 'pln' => 'pln', + 'qual' => 'phred', + 'raw' => 'txt', + 'scf' => 'scf', + 'seqxml' => 'xml', + 'strider' => 'strider', + 'swiss' => 'sp', + 'tab' => 'tsv', + 'tigr' => 'xml', + 'tigrxml' => 'xml', + 'tinyseq' => 'xml', + 'ztr' => 'ztr', + ); + + if($suffix_map{lc($self->format())}){ + return $suffix_map{lc($self->format())}; + }else{ + return 'unknown'; + } +} + +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Writer::Genomic + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Writer/HTML.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Writer/HTML.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,43 @@ +package CPT::Writer::HTML; +use Moose; +with 'CPT::Writer'; + +sub process { + my ($self) = @_; + $self->processed_data( $self->data ); + $self->processing_complete(1); +} + +sub suffix { + return 'html'; +} +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Writer::HTML + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Writer/JSON.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Writer/JSON.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,44 @@ +package CPT::Writer::JSON; +use Moose; +with 'CPT::Writer'; + +sub process { + my ($self) = @_; + use JSON::XS; + $self->processed_data( encode_json $self->data ); + $self->processing_complete(1); +} + +sub suffix { + return 'json'; +} +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Writer::JSON + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Writer/Pandoc.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Writer/Pandoc.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,43 @@ +package CPT::Writer::Pandoc; +use Moose; +with 'CPT::Writer'; + +sub process { + my ($self) = @_; + $self->processed_data( $self->data ); + $self->processing_complete(1); +} + +sub suffix { + return 'md'; +} +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Writer::Pandoc + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Writer/SVG.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Writer/SVG.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,43 @@ +package CPT::Writer::SVG; +use Moose; +with 'CPT::Writer'; + +sub process { + my ($self) = @_; + $self->processed_data( $self->data->xmlify() ); + $self->processing_complete(1); +} + +sub suffix { + return 'svg'; +} +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Writer::SVG + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Writer/Spreadsheet.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Writer/Spreadsheet.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,36 @@ +package CPT::Writer::Spreadsheet; +use Moose::Role; +use strict; +use warnings; +use autodie; + +no Moose::Role; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Writer::Spreadsheet + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Writer/Spreadsheet/XLS.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Writer/Spreadsheet/XLS.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,71 @@ +package CPT::Writer::Spreadsheet::XLS; +use Moose; +with 'CPT::Writer', 'CPT::Writer::Spreadsheet'; +use Spreadsheet::WriteExcel; + +sub process { + my ($self) = @_; + if ( $self->galaxy_override ) { + die 'This class currently incompatible with Galaxy'; + } + $self->OutputFilesClass->extension( $self->suffix() ); + my $next_output_file = $self->OutputFilesClass->get_next_file(); + my $workbook = Spreadsheet::WriteExcel->new($next_output_file); + + my %data = %{ $self->data }; + my @sheets = keys %data; + foreach (@sheets) { + my $current_worksheet = $workbook->add_worksheet($_); + my $data_struc_ref = $data{$_}; + + #R,C,AR + $current_worksheet->write_row( 0, 0, + ${$data_struc_ref}{'header'} ); + my $row = 1; + foreach ( @{ ${$data_struc_ref}{'data'} } ) { + $current_worksheet->write_row( $row, 0, $_ ); + $row++; + } + } + $self->processed_data($workbook); + $self->processing_complete(1); +} + +sub write { + my ($self) = @_; + $self->processed_data()->close(); +} + +sub suffix { + return 'xls'; +} +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Writer::Spreadsheet::XLS + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Writer/Spreadsheet/XLSX.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Writer/Spreadsheet/XLSX.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,78 @@ +package CPT::Writer::Spreadsheet::XLSX; +use Moose; +with 'CPT::Writer', 'CPT::Writer::Spreadsheet'; +use Excel::Writer::XLSX; + +#http://search.cpan.org/~jmcnamara/Excel-Writer-XLSX/lib/Excel/Writer/XLSX.pm#SPEED_AND_MEMORY_USAGE +# +#The effect of this is that Excel::Writer::XLSX is about 30% slower than Spreadsheet::WriteExcel and uses 5 times more memory. +# +#This memory usage can be reduced almost completely by using the Workbook set_optimization() method: +# +# $workbook->set_optimization(); +# +sub process { + my ($self) = @_; + if ( $self->galaxy_override ) { + die 'This class currently incompatible with Galaxy'; + } + my $workbook = Excel::Writer::XLSX->new( + join( '.', $self->outfile(), $self->suffix() ) ); + $workbook->set_optimization(); + my %data = %{ $self->data }; + my @sheets = keys %data; + foreach (@sheets) { + my $current_worksheet = $workbook->add_worksheet($_); + my $data_struc_ref = $data{$_}; + + #R,C,AR + $current_worksheet->write_row( 0, 0, + ${$data_struc_ref}{'header'} ); + my $row = 1; + foreach ( @{ ${$data_struc_ref}{'data'} } ) { + $current_worksheet->write_row( $row, 0, $_ ); + $row++; + } + } + $self->processed_data($workbook); + $self->processing_complete(1); +} + +sub write { + my ($self) = @_; + $self->processed_data()->close(); +} + +sub suffix { + return 'xlsx'; +} +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Writer::Spreadsheet::XLSX + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Writer/TSV.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Writer/TSV.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,110 @@ +package CPT::Writer::TSV; +no warnings; +use Moose; +with 'CPT::Writer'; + +sub process { + my ($self) = @_; + my %data = %{ $self->data }; + + my @sheets = keys %data; + my %complete_processed_data = map { $_ => "" } @sheets; + foreach (@sheets) { + my $tmp_data = ''; + my $data_struc_ref = $data{$_}; + $tmp_data .= '"' + . join( "\"\t\"", + map { local $_ = $_; s/"/\\"/g; $_ } + @{ ${$data_struc_ref}{'header'} } ) + . '"' . "\n"; + foreach ( @{ ${$data_struc_ref}{'data'} } ) { + $tmp_data .= '"' . join( + "\"\t\"", + map { + local $_ = $_; + unless (defined $_) { $_ = "" } + s/"/\\"/g; + $_; + } @{$_} + ) . '"' . "\n"; + } + $complete_processed_data{$_} = $tmp_data; + } + $self->processed_data( \%complete_processed_data ); + $self->processing_complete(1); +} + +sub write { + + # Wanted to use child's write method here so I can output multiple files. + my ($self) = @_; + if ( $self->processing_complete ) { + my %complete_processed_data = %{ $self->processed_data() }; + my @sheets = keys %complete_processed_data; + + # When this is initially called, the OutputFilesClass was given + # a hint as to what files from this analysis should be called. + # We'll borrow that and modify it each time before putting it + # back at the end. Since this is the *ONLY* type that has + # sub-reports, it feels O.K. to do it here. + my $base_name = $self->OutputFilesClass->given_filename(); + unless ($base_name) { $base_name = ""; } + foreach (@sheets) { + # We update the base filename to include our + # particular Sheet name. As such the generate function + # should start generating files with that as part of + # the name + $self->OutputFilesClass->given_filename( $base_name . '.' . $_ ); + $self->OutputFilesClass->extension('csv'); + my $next_output_file = + $self->OutputFilesClass->get_next_file(); + + # Store the filename we used + push(@{$self->used_filenames()}, $next_output_file); + open( my $outfile, '>', $next_output_file ); + print $outfile $complete_processed_data{$_}; + close($outfile); + } + # Reset it back to default (probably unnecessary) + $self->OutputFilesClass->given_filename($base_name); + } + else { + warn +"Write called but processing was not marked as complete. Not writing"; + } + +} + +sub suffix { + return 'csv'; +} +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Writer::TSV + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Writer/TSV_U.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Writer/TSV_U.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,110 @@ +package CPT::Writer::TSV_U; +no warnings; +use Moose; +with 'CPT::Writer'; + +sub process { + my ($self) = @_; + my %data = %{ $self->data }; + + my @sheets = keys %data; + my %complete_processed_data = map { $_ => "" } @sheets; + foreach (@sheets) { + my $tmp_data = ''; + my $data_struc_ref = $data{$_}; + $tmp_data .= + join( "\t", + map { local $_ = $_; s/"/\\"/g; $_ } + @{ ${$data_struc_ref}{'header'} } ) + . "\n"; + foreach ( @{ ${$data_struc_ref}{'data'} } ) { + $tmp_data .= join( + "\t", + map { + local $_ = $_; + unless (defined $_) { $_ = "" } + s/"/\\"/g; + $_; + } @{$_} + ) . "\n"; + } + $complete_processed_data{$_} = $tmp_data; + } + $self->processed_data( \%complete_processed_data ); + $self->processing_complete(1); +} + +sub write { + + # Wanted to use child's write method here so I can output multiple files. + my ($self) = @_; + if ( $self->processing_complete ) { + my %complete_processed_data = %{ $self->processed_data() }; + my @sheets = keys %complete_processed_data; + + # When this is initially called, the OutputFilesClass was given + # a hint as to what files from this analysis should be called. + # We'll borrow that and modify it each time before putting it + # back at the end. Since this is the *ONLY* type that has + # sub-reports, it feels O.K. to do it here. + my $base_name = $self->OutputFilesClass->given_filename(); + unless ($base_name) { $base_name = ""; } + foreach (@sheets) { + # We update the base filename to include our + # particular Sheet name. As such the generate function + # should start generating files with that as part of + # the name + $self->OutputFilesClass->given_filename( $base_name . '.' . $_ ); + $self->OutputFilesClass->extension('csv'); + my $next_output_file = + $self->OutputFilesClass->get_next_file(); + + # Store the filename we used + push(@{$self->used_filenames()}, $next_output_file); + open( my $outfile, '>', $next_output_file ); + print $outfile $complete_processed_data{$_}; + close($outfile); + } + # Reset it back to default (probably unnecessary) + $self->OutputFilesClass->given_filename($base_name); + } + else { + warn +"Write called but processing was not marked as complete. Not writing"; + } + +} + +sub suffix { + return 'csv'; +} +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Writer::TSV_U + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Writer/TXT.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Writer/TXT.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,43 @@ +package CPT::Writer::TXT; +use Moose; +with 'CPT::Writer'; + +sub process { + my ($self) = @_; + $self->processed_data( $self->data ); + $self->processing_complete(1); +} + +sub suffix { + return 'txt'; +} +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Writer::TXT + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut diff -r 000000000000 -r b8b8b52904a5 cpt_psm_comparison_table/lib/CPT/Writer/YAML.pm --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cpt_psm_comparison_table/lib/CPT/Writer/YAML.pm Tue Jul 05 05:42:59 2022 +0000 @@ -0,0 +1,44 @@ +package CPT::Writer::YAML; +use Moose; +with 'CPT::Writer'; + +sub process { + my ($self) = @_; + require YAML::XS; + $self->processed_data( YAML::XS::Dump( $self->data ) ); + $self->processing_complete(1); +} + +sub suffix { + return 'yml'; +} +no Moose; +1; + +__END__ + +=pod + +=encoding UTF-8 + +=head1 NAME + +CPT::Writer::YAML + +=head1 VERSION + +version 1.99.4 + +=head1 AUTHOR + +Eric Rasche + +=head1 COPYRIGHT AND LICENSE + +This software is Copyright (c) 2014 by Eric Rasche. + +This is free software, licensed under: + + The GNU General Public License, Version 3, June 2007 + +=cut