changeset 0:587281a1acec draft

Uploaded
author dcouvin
date Fri, 17 Sep 2021 19:29:45 +0000
parents
children b6eb9111d7af
files input.fasta removeChar.pl removeChar.xml
diffstat 3 files changed, 111 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/input.fasta	Fri Sep 17 19:29:45 2021 +0000
@@ -0,0 +1,8 @@
+>sequence1
+atgcatgcatgcacgatcgatcgat--gca-tgcac
+>sequence2
+aaacatgcatgcacgatcgatcgatgtatg---cac
+>sequence3
+atgcatgcatgcactatcgatcgat-gcata--aac
+>sequence4
+atgcatgcacgcatgatcgatcga-tgca--tgcac
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/removeChar.pl	Fri Sep 17 19:29:45 2021 +0000
@@ -0,0 +1,57 @@
+#!/usr/bin/perl -w
+use strict;
+
+############################################################################
+# script to remove position or column from a multi-Fasta file
+# in function of a given character
+############################################################################
+
+
+my $inFile = $ARGV[0]; #'example_seq.fasta';
+my $char = $ARGV[1]; #'N';
+my @headers = ();
+my @sequences = ();
+my $index = 0;
+my $outFile = 'results.fna';
+open(IN,'<',$inFile) or die "Unable to read file $inFile: $!\n";
+while( defined( my $line = <IN> ) ){
+    chomp($line);
+    if( $line =~ m/^>/ ){
+        $headers[$index] = $line;
+        $index++;
+    }
+    else{
+        $sequences[$index-1] .= $line;
+    }
+}
+close(IN);
+my %lookup = ();
+for(my $i=0;$i<=$#sequences;$i++){
+    my $seq = $sequences[$i];
+    my $len = length($seq);
+        for(my $j=0;$j<$len;$j++){
+        my $residue = substr($seq,$j,1);
+        if( $residue eq $char ){
+            $lookup{$j} = 1;
+        }
+    }
+}
+#print "# Skipped the following positions (zero indexed):\n";
+#print "# ",join(", ", sort {$a <=> $b} keys (%lookup)), "\n";
+#print "# Cleaned sequences:\n";
+#open(OUT,'>',$outFile) or die "Unable to write file $outFile: $!\n";
+for(my $i=0;$i<=$#headers;$i++){
+    my $head = $headers[$i];
+    my $seq = $sequences[$i];
+    my $len = length($seq);
+    my $out = '';
+    for(my $j=0;$j<$len;$j++){
+        my $residue = substr($seq,$j,1);
+        $out .= $residue unless exists $lookup{$j};
+    }
+    print $head, "\n", $out, "\n";
+    #print OUT $head, "\n", $out, "\n";
+}
+#close(OUT);
+#print "\n";
+#print "End of program! Your result is written in file $outFile\n";
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/removeChar.xml	Fri Sep 17 19:29:45 2021 +0000
@@ -0,0 +1,46 @@
+<tool id="removechar" name="removeChar tool" version="1.0.0">
+  <description>allows to remove positions (or columns) from a multi-Fasta alignment file in function of a given character</description>
+
+<!--<requirements>
+  <requirement type="package" version="1.7.2">perl-bioperl</requirement>
+</requirements>-->
+
+<command detect_errors="aggressive"><![CDATA[
+
+#import re
+        ## Creates symlinks for each input file based on the Galaxy 'element_identifier'
+        ## Used so that a human-readable name appears in the output table (instead of 'dataset_xyz.dat')
+            ## Add single quotes around each input file identifier
+            #set $_input_file = "'{}'".format($input.element_identifier)
+            ln -s '${input}' ${_input_file} &&
+
+
+        perl '$__tool_directory__/removeChar.pl' $_input_file $char > "$output"
+
+
+
+]]></command>
+ <!-- perl '$__tool_directory__/nucleScore.pl' $_input_file > "$output"  -->
+ <!-- ./nuclescore.sh ${named_input_files} > "$output" -->
+
+<inputs>
+  <param format="fasta" name="input" type="data" label="Multi-FASTA file: "/>
+  <param name="char" type="text" area="false" value="N" label="Character to be removed from Multi-FASTA file:" help="Users can directly write the character to be removed without quotes ("" or '')" />
+</inputs>
+
+ <outputs>
+    <data format="fasta" name="output" />
+ </outputs>
+
+<help><![CDATA[
+removeChar.pl is a Perl script allowing to remove positions (or columns) of an aligned multi-Fasta file in function of a given character (eg. N).
+The resulting multi-Fasta file corresponds to the same input multiFasta alignment file without the queried character.
+
+This script belongs to the getSequenceInfo supplementary tools.
+
+- GitHub: https://github.com/karubiotools/getSequenceInfo/tree/master/supplementary_tools
+]]>
+</help>
+
+</tool>
+