0
|
1 #!/bin/sh
|
|
2 ##convert gsMapper output into gff3/GVF format
|
|
3
|
|
4 #New Zealand Institute for Plant and Food Research
|
|
5 #This program is free software: you can redistribute it and/or modify
|
|
6 # it under the terms of the GNU General Public License as published by
|
|
7 # the Free Software Foundation, either version 3 of the License, or
|
|
8 # (at your option) any later version.
|
|
9 #
|
|
10 # This program is distributed in the hope that it will be useful,
|
|
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13 # GNU General Public License for more details.
|
|
14 #
|
|
15 # You should have received a copy of the GNU General Public License
|
|
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
17
|
|
18
|
|
19 infile=$1
|
|
20 outfile=$2
|
|
21
|
|
22 awk '
|
|
23 BEGIN {OFS="\t"}
|
|
24 /^>/ && sub(/%/,"",$7) {
|
|
25 ID=substr($1,2)
|
|
26 if (length($4) > 1 || match($4,"-") || length($5) > 1 || match($5,"-"))
|
|
27 type="indel"
|
|
28 else
|
|
29 type="SNP"
|
|
30 start=$2
|
|
31 end=$3
|
|
32 Col9_ID=ID ":gsmapper:" type ":"start
|
|
33
|
|
34 Reference_seq=$4
|
|
35 Variant_seq=$5
|
|
36 Total_reads=$6
|
|
37 Variant_reads=Total_reads * $7 /100 - (Total_reads * $7 % 100)/100
|
|
38
|
|
39
|
|
40
|
|
41 print ID,"gsmapper",type,start,end,".",".",".","ID="Col9_ID";Reference_seq="Reference_seq";Variant_seq="Variant_seq";Total_reads="Total_reads";Variant_reads="Variant_reads
|
|
42 }' "$infile" > "$outfile"
|
|
43
|
|
44
|
|
45
|
|
46
|
|
47
|
|
48
|
|
49
|
|
50
|