Mercurial > repos > artbio > mircounts
diff format_fasta_hairpins.sh @ 0:da29af78a960 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/mircounts commit d4d8106d66b65679a1a685ab94bfcf99cdb7b959
author | artbio |
---|---|
date | Mon, 24 Jul 2017 06:27:50 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/format_fasta_hairpins.sh Mon Jul 24 06:27:50 2017 -0400 @@ -0,0 +1,19 @@ +GENOME_KEY=$1 + +gunzip hairpin.fa.gz +sed -i.bak '/^[^>]/ y/uU/tT/' hairpin.fa ## replace U by tT +sed -i.bak2 -E 's/ .+//' hairpin.fa ## just leaves mir name as one word header +awk '/^>/ {printf("\n%s\n",$0);next; } { printf("%s",$0);} END {printf("\n");}' < hairpin.fa > hairpin.fa.bak3 +tail -n +2 hairpin.fa.bak3 > hairpin.fa ## generate single line sequences +awk 'BEGIN{RS=">"}{gsub("\n","\t",$0); print ">"$0}' < hairpin.fa > hairpin.fa.tmp +mv hairpin.fa hairpin.bak4 && tail -n +2 hairpin.fa.tmp > hairpin.fa +rm hairpin.fa.tmp ## tabular sequences +sed -i.bak5 -E $'s/\t$//g' hairpin.fa ## remove tab before end line leaved by previous awk +grep ">${GENOME_KEY}-" hairpin.fa > hairpin.fa.tmp +mv hairpin.fa hairpin.fa.bak6 +mv hairpin.fa.tmp hairpin.fa ## filter tabular hairpins with proper genomeKey +tr '\t' '\n' < hairpin.fa > hairpin.fa.tmp +mv hairpin.fa hairpin.fa.bak7 +mv hairpin.fa.tmp hairpin.fa ## terminate parsing by regenerating fasta format, bowtie-build ready +rm ./*.bak* ## cleaning job directory +