Mercurial > repos > devteam > categorize_elements_satisfying_criteria
comparison categorize_elements_satisfying_criteria.pl @ 0:586c1f0e1515 draft default tip
Uploaded tool tarball.
author | devteam |
---|---|
date | Wed, 25 Sep 2013 10:03:03 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:586c1f0e1515 |
---|---|
1 #!/usr/bin/perl -w | |
2 | |
3 # The program takes as input a set of categories, such that each category contains many elements. | |
4 # It also takes a table relating elements with criteria, such that each element is assigned a number | |
5 # representing the number of times the element satisfies a certain criterion. | |
6 # The first input is a TABULAR format file, such that the left column represents the name of categories and, | |
7 # all other columns represent the names of elements. | |
8 # The second input is a TABULAR format file relating elements with criteria, such that the first line | |
9 # represents the names of criteria and the left column represents the names of elements. | |
10 # The output is a TABULAR format file relating catergories with criteria, such that each categoy is | |
11 # assigned a number representing the total number of times its elements satisfies a certain criterion. | |
12 # Each category is assigned as many numbers as criteria. | |
13 | |
14 use strict; | |
15 use warnings; | |
16 | |
17 #variables to handle information of the categories input file | |
18 my @categoryElementsArray = (); | |
19 my @categoriesArray = (); | |
20 my $categoryMemberNames; | |
21 my $categoryName; | |
22 my %categoryMembersHash = (); | |
23 my $memberNumber = 0; | |
24 my $totalMembersNumber = 0; | |
25 my $totalCategoriesNumber = 0; | |
26 my @categoryCountersTwoDimArray = (); | |
27 my $lineCounter1 = 0; | |
28 | |
29 #variables to handle information of the criteria and elements data input file | |
30 my $elementLine; | |
31 my @elementDataArray = (); | |
32 my $elementName; | |
33 my @criteriaArray = (); | |
34 my $criteriaNumber = 0; | |
35 my $totalCriteriaNumber = 0; | |
36 my $lineCounter2 = 0; | |
37 | |
38 #variable representing the row and column indices used to store results into a two-dimensional array | |
39 my $row = 0; | |
40 my $column = 0; | |
41 | |
42 # check to make sure having correct files | |
43 my $usage = "usage: categorize_motifs_significance.pl [TABULAR.in] [TABULAR.in] [TABULAR.out] \n"; | |
44 die $usage unless @ARGV == 3; | |
45 | |
46 #get the categories input file | |
47 my $categories_inputFile = $ARGV[0]; | |
48 | |
49 #get the criteria and data input file | |
50 my $elements_data_inputFile = $ARGV[1]; | |
51 | |
52 #get the output file | |
53 my $categorized_data_outputFile = $ARGV[2]; | |
54 | |
55 #open the input and output files | |
56 open (INPUT1, "<", $categories_inputFile) || die("Could not open file $categories_inputFile \n"); | |
57 open (INPUT2, "<", $elements_data_inputFile ) || die("Could not open file $elements_data_inputFile \n"); | |
58 open (OUTPUT, ">", $categorized_data_outputFile) || die("Could not open file $categorized_data_outputFile \n"); | |
59 | |
60 #store the first input file into an array | |
61 my @categoriesData = <INPUT1>; | |
62 | |
63 #reset the value of $lineCounter1 to 0 | |
64 $lineCounter1 = 0; | |
65 | |
66 #iterate through the first input file to get the names of categories and their corresponding elements | |
67 foreach $categoryMemberNames (@categoriesData){ | |
68 chomp ($categoryMemberNames); | |
69 | |
70 @categoryElementsArray = split(/\t/, $categoryMemberNames); | |
71 | |
72 #store the name of the current category into an array | |
73 $categoriesArray [$lineCounter1] = $categoryElementsArray[0]; | |
74 | |
75 #store the name of the current category into a two-dimensional array | |
76 $categoryCountersTwoDimArray [$lineCounter1] [0] = $categoryElementsArray[0]; | |
77 | |
78 #get the total number of elements in the current category | |
79 $totalMembersNumber = @categoryElementsArray; | |
80 | |
81 #store the names of categories and their corresponding elements into a hash | |
82 for ($memberNumber = 1; $memberNumber < $totalMembersNumber; $memberNumber++) { | |
83 | |
84 $categoryMembersHash{$categoryElementsArray[$memberNumber]} = $categoriesArray[$lineCounter1]; | |
85 } | |
86 | |
87 $lineCounter1++; | |
88 } | |
89 | |
90 #store the second input file into an array | |
91 my @elementsData = <INPUT2>; | |
92 | |
93 #reset the value of $lineCounter2 to 0 | |
94 $lineCounter2 = 0; | |
95 | |
96 #iterate through the second input file in order to count the number of elements | |
97 #in each category that satisfy each criterion | |
98 foreach $elementLine (@elementsData){ | |
99 chomp ($elementLine); | |
100 | |
101 $lineCounter2++; | |
102 | |
103 @elementDataArray = split(/\t/, $elementLine); | |
104 | |
105 #if at the first line, get the total number of criteria and the total | |
106 #number of catergories and initialize the two-dimensional array | |
107 if ($lineCounter2 == 1){ | |
108 @criteriaArray = @elementDataArray; | |
109 $totalCriteriaNumber = @elementDataArray; | |
110 | |
111 $totalCategoriesNumber = @categoriesArray; | |
112 | |
113 #initialize the two-dimensional array | |
114 for ($row = 0; $row < $totalCategoriesNumber; $row++) { | |
115 | |
116 for ($column = 1; $column <= $totalCriteriaNumber; $column++) { | |
117 | |
118 $categoryCountersTwoDimArray [$row][$column] = 0; | |
119 } | |
120 } | |
121 } | |
122 else{ | |
123 #get the element data | |
124 $elementName = $elementDataArray[0]; | |
125 | |
126 #do the counting and store the result in the two-dimensional array | |
127 for ($criteriaNumber = 0; $criteriaNumber < $totalCriteriaNumber; $criteriaNumber++) { | |
128 | |
129 if ($elementDataArray[$criteriaNumber + 1] > 0){ | |
130 | |
131 $categoryName = $categoryMembersHash{$elementName}; | |
132 | |
133 my ($categoryIndex) = grep $categoriesArray[$_] eq $categoryName, 0 .. $#categoriesArray; | |
134 | |
135 $categoryCountersTwoDimArray [$categoryIndex] [$criteriaNumber + 1] = $categoryCountersTwoDimArray [$categoryIndex] [$criteriaNumber + 1] + $elementDataArray[$criteriaNumber + 1]; | |
136 } | |
137 } | |
138 } | |
139 } | |
140 | |
141 print OUTPUT "\t"; | |
142 | |
143 #store the criteria names into the output file | |
144 for ($column = 1; $column <= $totalCriteriaNumber; $column++) { | |
145 | |
146 if ($column < $totalCriteriaNumber){ | |
147 print OUTPUT $criteriaArray[$column - 1] . "\t"; | |
148 } | |
149 else{ | |
150 print OUTPUT $criteriaArray[$column - 1] . "\n"; | |
151 } | |
152 } | |
153 | |
154 #store the category names and their corresponding number of elements satisfying criteria into the output file | |
155 for ($row = 0; $row < $totalCategoriesNumber; $row++) { | |
156 | |
157 for ($column = 0; $column <= $totalCriteriaNumber; $column++) { | |
158 | |
159 if ($column < $totalCriteriaNumber){ | |
160 print OUTPUT $categoryCountersTwoDimArray [$row][$column] . "\t"; | |
161 } | |
162 else{ | |
163 print OUTPUT $categoryCountersTwoDimArray [$row][$column] . "\n"; | |
164 } | |
165 } | |
166 } | |
167 | |
168 #close the input and output file | |
169 close(OUTPUT); | |
170 close(INPUT2); | |
171 close(INPUT1); | |
172 |