annotate filter_columns.sh @ 0:e77c9484b2d0 draft default tip

Uploaded
author saskia-hiltemann
date Thu, 22 Oct 2015 09:18:30 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
1 #!/bin/bash
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
2
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
3 echo $@
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
4 count=1
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
5
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
6 set -- `getopt -n$0 -u -a --longoptions="infile: outfile_rem: outfile_ret: filter: " "h:" "$@"` || usage
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
7 [ $# -eq 0 ] && usage
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
8
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
9
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
10 while [ $# -gt 0 ]
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
11 do
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
12 case "$1" in
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
13 --infile) infile=$2;shift;;
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
14 --outfile_rem) outfile_rem=$2;shift;;
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
15 --outfile_ret) outfile_ret=$2;shift;;
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
16 --filter) filter[$count]=$2
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
17 count=$[$count+1]
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
18 shift;;
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
19 -h) shift;;
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
20 --) shift;break;;
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
21 -*) usage;;
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
22 *) break;;
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
23 esac
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
24 shift
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
25 done
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
26
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
27
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
28
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
29
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
30 # remove but remember header
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
31 cp $infile inputfile
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
32 sed -i -e "1d" inputfile
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
33
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
34 head inputfile
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
35
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
36 count=1
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
37 cp $infile inputfile
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
38
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
39 for f in ${filter[@]}
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
40 do
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
41 echo "filter $count: $f"
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
42 count=$[$count+1]
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
43
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
44 IFS=',' read -a filt <<< "$f"
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
45
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
46 #filt=split(${f//,/ })
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
47 column=${filt[0]}
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
48 op=${filt[1]}
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
49 value=${filt[2]}
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
50
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
51 echo "column: $column"
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
52 echo "op : $op"
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
53 echo "value: $value"
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
54
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
55 #perform filtering
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
56
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
57 awk 'BEGIN{
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
58 FS="\t"
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
59 OFS="\t"
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
60 op="'"$op"'"
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
61 numeric_value="'"$value"'"+0
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
62
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
63 }{
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
64 # keep header in both output files
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
65 if(FNR==1 || index($0,"#")==1 || index($0,"<")==1 || index($0,">")==1 || NF==0){
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
66 print $0 >> "outfile_removed"
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
67 print $0 >> "outfile_retained"
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
68 }
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
69
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
70 if ( "'"$op"'"== "equals" && $"'"$column"'" == "'"$value"'"){
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
71 print $0 >> "outfile_removed"
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
72 }
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
73 else if ( "'"$op"'"== "nequals" && $"'"$column"'" != "'"$value"'"){
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
74 print $0 >> "outfile_removed"
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
75 }
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
76 else if ( "'"$op"'"== "contains" && index($"'"$column"'", "'"$value"'") != 0){
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
77 print $0 >> "outfile_removed"
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
78 }
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
79 else if ( "'"$op"'"== "ncontains" && index($"'"$column"'", "'"$value"'") == 0){
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
80 print $0 >> "outfile_removed"
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
81 }
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
82 else if ( "'"$op"'"== "empty" && $"'"$column"'" == ""){
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
83 print $0 >> "outfile_removed"
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
84 }
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
85 else if ( "'"$op"'"== "nonempty" && $"'"$column"'" != ""){
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
86 print $0 >> "outfile_removed"
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
87 }
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
88 else if ( "'"$op"'"== "lt" && $"'"$column"'"+0 < "'"$value"'"+0){
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
89 print $0 >> "outfile_removed"
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
90 }
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
91 else if ( "'"$op"'"== "le" && $"'"$column"'"+0 <= "'"$value"'"+0){
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
92 print $0 >> "outfile_removed"
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
93 }
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
94 else if ( "'"$op"'"== "gt" && $"'"$column"'"+0 > "'"$value"'"+0){
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
95 print $0 >> "outfile_removed"
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
96 }
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
97 else if ( "'"$op"'"== "ge" && $"'"$column"'"+0 >= "'"$value"'"+0){
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
98 print $0 >> "outfile_removed"
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
99 }
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
100 else
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
101 print $0 >> "outfile_retained"
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
102
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
103 }END{}' inputfile
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
104
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
105 #next iteration only run on retained lines
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
106 cp outfile_retained inputfile
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
107 rm outfile_retained
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
108 done
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
109
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
110 #remove duplicate lines in outputfiles
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
111 cat outfile_removed | uniq > $outfile_rem
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
112 cat inputfile | uniq > $outfile_ret
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
113
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
114 #awk ' !x[$0]++' outfile_removed > $outfile_rem
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
115 #awk ' !x[$0]++' outfile_retained > $outfile_ret
e77c9484b2d0 Uploaded
saskia-hiltemann
parents:
diff changeset
116