Mercurial > repos > abims-sbr > orthogroups_tool
diff scripts/format_transdecoder_headers.sh @ 0:d33ad52f59bc draft default tip
planemo upload for repository htpps://github.com/abims-sbr/adaptearch commit 3c7982d775b6f3b472f6514d791edcb43cd258a1-dirty
author | abims-sbr |
---|---|
date | Fri, 01 Feb 2019 10:25:10 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/format_transdecoder_headers.sh Fri Feb 01 10:25:10 2019 -0500 @@ -0,0 +1,27 @@ +#/bin/bash + +# v2 - this script modifies the 'Orthogroups.txt' file in order to make it easily readable by the following script, filter_orthofinder.py + #Example : + #OG0000001: Gene.117__As119_1/1_1.000_543__g.117__m.117 Gene.157__As170_1/1_1.000_1203__g.157__m.157 + #Becomes : + #As119_1/1_1.000_543 As170_1/1_1.000_1203 + +# removes 'OGxxxxxxx: ' +sed -E 's/OG[0-9]{7,}:\s//' $1 > $2 +# removes things like Gene.119__ +sed -i -E 's/Gene\.[0-9]{1,}\_\_/>/g' $2 +# removes things like __g.117__m.117 +sed -i -E 's/\_\_g\.[0-9]{1,}\_\_m\.[0-9]{1,}//g' $2 + +# Old version + +# removes 'OGxxxxxxx ' +#sed -E 's/OG[0-9]{7}:\s//' $1 > $2 +# replace _+_ by (+) because '_' causes bugs +#sed -i 's/_+_/(+)/g' $2 +# Replaces everything by '>' +#sed -i -E 's/m\.[0-9]{1,}[^()]+\(\+\)\s*/>/g' $2 +# Removes terminal '(+)' +#sed -i 's/(+)//g' $2 +# Removes last suite of unwanted numbers, underscore and dash +#sed -i -E 's/\_[0-9]{1,}-[0-9]{1,}//g' $2