Mercurial > repos > dcouvin > removechar
changeset 0:587281a1acec draft
Uploaded
author | dcouvin |
---|---|
date | Fri, 17 Sep 2021 19:29:45 +0000 |
parents | |
children | b6eb9111d7af |
files | input.fasta removeChar.pl removeChar.xml |
diffstat | 3 files changed, 111 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/input.fasta Fri Sep 17 19:29:45 2021 +0000 @@ -0,0 +1,8 @@ +>sequence1 +atgcatgcatgcacgatcgatcgat--gca-tgcac +>sequence2 +aaacatgcatgcacgatcgatcgatgtatg---cac +>sequence3 +atgcatgcatgcactatcgatcgat-gcata--aac +>sequence4 +atgcatgcacgcatgatcgatcga-tgca--tgcac
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/removeChar.pl Fri Sep 17 19:29:45 2021 +0000 @@ -0,0 +1,57 @@ +#!/usr/bin/perl -w +use strict; + +############################################################################ +# script to remove position or column from a multi-Fasta file +# in function of a given character +############################################################################ + + +my $inFile = $ARGV[0]; #'example_seq.fasta'; +my $char = $ARGV[1]; #'N'; +my @headers = (); +my @sequences = (); +my $index = 0; +my $outFile = 'results.fna'; +open(IN,'<',$inFile) or die "Unable to read file $inFile: $!\n"; +while( defined( my $line = <IN> ) ){ + chomp($line); + if( $line =~ m/^>/ ){ + $headers[$index] = $line; + $index++; + } + else{ + $sequences[$index-1] .= $line; + } +} +close(IN); +my %lookup = (); +for(my $i=0;$i<=$#sequences;$i++){ + my $seq = $sequences[$i]; + my $len = length($seq); + for(my $j=0;$j<$len;$j++){ + my $residue = substr($seq,$j,1); + if( $residue eq $char ){ + $lookup{$j} = 1; + } + } +} +#print "# Skipped the following positions (zero indexed):\n"; +#print "# ",join(", ", sort {$a <=> $b} keys (%lookup)), "\n"; +#print "# Cleaned sequences:\n"; +#open(OUT,'>',$outFile) or die "Unable to write file $outFile: $!\n"; +for(my $i=0;$i<=$#headers;$i++){ + my $head = $headers[$i]; + my $seq = $sequences[$i]; + my $len = length($seq); + my $out = ''; + for(my $j=0;$j<$len;$j++){ + my $residue = substr($seq,$j,1); + $out .= $residue unless exists $lookup{$j}; + } + print $head, "\n", $out, "\n"; + #print OUT $head, "\n", $out, "\n"; +} +#close(OUT); +#print "\n"; +#print "End of program! Your result is written in file $outFile\n";
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/removeChar.xml Fri Sep 17 19:29:45 2021 +0000 @@ -0,0 +1,46 @@ +<tool id="removechar" name="removeChar tool" version="1.0.0"> + <description>allows to remove positions (or columns) from a multi-Fasta alignment file in function of a given character</description> + +<!--<requirements> + <requirement type="package" version="1.7.2">perl-bioperl</requirement> +</requirements>--> + +<command detect_errors="aggressive"><![CDATA[ + +#import re + ## Creates symlinks for each input file based on the Galaxy 'element_identifier' + ## Used so that a human-readable name appears in the output table (instead of 'dataset_xyz.dat') + ## Add single quotes around each input file identifier + #set $_input_file = "'{}'".format($input.element_identifier) + ln -s '${input}' ${_input_file} && + + + perl '$__tool_directory__/removeChar.pl' $_input_file $char > "$output" + + + +]]></command> + <!-- perl '$__tool_directory__/nucleScore.pl' $_input_file > "$output" --> + <!-- ./nuclescore.sh ${named_input_files} > "$output" --> + +<inputs> + <param format="fasta" name="input" type="data" label="Multi-FASTA file: "/> + <param name="char" type="text" area="false" value="N" label="Character to be removed from Multi-FASTA file:" help="Users can directly write the character to be removed without quotes ("" or '')" /> +</inputs> + + <outputs> + <data format="fasta" name="output" /> + </outputs> + +<help><![CDATA[ +removeChar.pl is a Perl script allowing to remove positions (or columns) of an aligned multi-Fasta file in function of a given character (eg. N). +The resulting multi-Fasta file corresponds to the same input multiFasta alignment file without the queried character. + +This script belongs to the getSequenceInfo supplementary tools. + +- GitHub: https://github.com/karubiotools/getSequenceInfo/tree/master/supplementary_tools +]]> +</help> + +</tool> +