1
|
1 #!/usr/bin/perl
|
|
2
|
|
3 # load modules
|
|
4 use Getopt::Std;
|
|
5 use threads;
|
|
6 use Thread::Queue;
|
|
7 use threads::shared;
|
|
8
|
|
9 $now = time ;
|
|
10
|
|
11 # opts
|
|
12 # i : infile
|
|
13 # f : patternfile
|
|
14 # o : output file
|
|
15 # t : type (file/single)
|
|
16 # I : Insenstive to case
|
|
17 # P : Perl-Based Grep (boolean)
|
|
18 # A : number of extra lines to fetch
|
|
19 getopts('i:f:o:t:PIA:', \%opts) ;
|
|
20
|
|
21 ## variables for threads.
|
|
22 my $infile :shared;
|
|
23 my $outfile :shared;
|
|
24 my $args :shared;
|
|
25 my $rand :shared;
|
|
26 my $tmpdir :shared;
|
|
27
|
|
28 ## nr of grep threads (if created), one extra is created for printing.
|
|
29 my $nrgrep = 6;
|
|
30
|
|
31
|
|
32 ## infile && outfile check
|
|
33 if (!defined($opts{'i'})) { die('Input file is mandatory');}
|
|
34 $infile = $opts{'i'};
|
|
35 if (!defined($opts{'o'})) { die('Output file is mandatory');}
|
|
36 $outfile = $opts{'o'};
|
|
37
|
|
38 #$args = '';
|
|
39 if (defined($opts{'I'})) {
|
|
40 $args .= ' -i';
|
|
41 }
|
|
42 if (defined($opts{'P'})) {
|
|
43 $args .= " -P";
|
|
44 }
|
|
45 if (defined($opts{'A'})) {
|
|
46 if ($opts{'A'} =~ m/^\d+$/) {
|
|
47 $args .= " -A $opts{'A'}";
|
|
48 }
|
|
49 else {
|
|
50 die("Invalid amount of lines to fetch provided (must be integer)");
|
|
51 }
|
|
52 }
|
|
53
|
|
54 ## create tmp location & outfile.
|
|
55 $rand = int(rand(10000));
|
|
56 while (-d "/tmp/GrepFile.$rand") {
|
|
57 $rand = int(rand(10000));
|
|
58 }
|
|
59 mkdir("/tmp/GrepFile.$rand");
|
|
60 $tmpdir = "/tmp/GrepFile.$rand";
|
|
61
|
|
62
|
|
63 ## type?
|
|
64 if (!defined($opts{'t'})) { die('Pattern source is mandatory (file/single)');}
|
|
65 $type = $opts{'t'};
|
|
66
|
|
67 ## only threads for file based patterns
|
|
68 if ($type eq 'file') {
|
|
69 $grepqueue = Thread::Queue->new();
|
|
70 $printqueue = Thread::Queue->new();
|
|
71 for ($i = 1; $i <= $nrgrep; $i++) {
|
|
72 ${"grep$i"} = threads->create('grep');
|
|
73 }
|
|
74 $print = threads->create('printout');
|
|
75 }
|
|
76
|
|
77 ## single pattern => direct processing
|
|
78 if ($type eq 'single') {
|
|
79 $patt = $opts{'f'};
|
|
80 system("grep $args '$patt' $infile > $tmpdir/result.txt");
|
|
81
|
|
82 }
|
|
83 elsif ($type eq 'file') {
|
|
84 $pattfile = $opts{'f'};
|
|
85 ## make sure patterns are unique
|
|
86 open IN, "$pattfile";
|
|
87 my %pats;
|
|
88 while (<IN>) {
|
|
89 chomp($_);
|
|
90 $pats{$_} = 1;
|
|
91 }
|
|
92 close IN;
|
|
93 ## copy infile to local system for speed
|
|
94 system("cp '$infile' '$tmpdir/infile'");
|
|
95 mkdir("$tmpdir/pattfiles");
|
|
96 ## run in batches of 100 patterns.
|
|
97 my @patterns = keys(%pats);
|
|
98 $idx = 0;
|
|
99 while (my @subset = splice(@patterns,0,100)) {
|
|
100 if (scalar(@subset) == 0) {
|
|
101 last;
|
|
102 }
|
|
103 $idx++;
|
|
104 open OUT, ">$tmpdir/pattfiles/$idx";
|
|
105 print OUT join("\n",@subset);
|
|
106 close OUT;
|
|
107 $grepqueue->enqueue($idx);
|
|
108 }
|
|
109 for ($i = 1; $i <= $nrgrep; $i++) {
|
|
110 $grepqueue->enqueue(undef);
|
|
111 }
|
|
112 }
|
|
113 else {
|
|
114 die('only "file" and "single" are supported as value of the -t flag');
|
|
115 }
|
4
|
116 if ($type eq 'file') {
|
|
117 for ($i = 1; $i<= $nrgrep ; $i++) {
|
|
118 ${"grep$i"}->join();
|
|
119 }
|
5
|
120
|
|
121 $printqueue->enqueue(undef);
|
|
122 $print->join();
|
1
|
123 }
|
|
124 system("cp $tmpdir/result.txt '$outfile'");
|
|
125
|
|
126 system("rm -Rf $tmpdir");
|
|
127
|
|
128 ##################
|
|
129 # PRINT RUN-TIME #
|
|
130 ##################
|
|
131 $now = time - $now;
|
|
132 printf("\n\nRunning time:%02d:%02d:%02d\n",int($now/3600),int(($now % 3600)/60),int($now % 60));
|
|
133
|
|
134 sub grep {
|
|
135 #local copies
|
|
136 my $in = $infile;
|
|
137 my $largs = $args;
|
|
138 my $ltmp = $tmpdir;
|
|
139 while (defined(my $idx = $grepqueue->dequeue())) {
|
|
140 $command = "grep $largs -f '$ltmp/pattfiles/$idx' '$ltmp/infile' | grep -v '^--\$'";
|
|
141 my $out = `$command`;
|
|
142 $printqueue->enqueue($out);
|
|
143 }
|
|
144 }
|
|
145
|
|
146 sub printout {
|
|
147 $counter = 0;
|
|
148 $output = '';
|
|
149 while (defined(my $result = $printqueue->dequeue())) {
|
|
150 if ($result ne '' && $result ne "\n") {
|
|
151 $output .= $result;
|
|
152 $counter++;
|
|
153 }
|
|
154 if ($counter > 50) {
|
|
155 open OUT, ">>$tmpdir/result.txt";
|
|
156 print OUT $output;
|
|
157 close OUT;
|
|
158 $output = '';
|
|
159 $counter = 0;
|
|
160 }
|
|
161 }
|
|
162 if ($output ne '') {
|
|
163 open OUT, ">>$tmpdir/result.txt";
|
|
164 print OUT $output;
|
|
165 close OUT;
|
|
166 }
|
|
167 }
|
|
168
|