0
|
1 #!/usr/bin/perl -w
|
|
2
|
|
3 ###############################################################
|
|
4 # cuffdiff_mds_plot.pl
|
|
5 # John Garbe
|
|
6 # Septmeber 2012
|
|
7 #
|
|
8 # Given a sample tracking file from cuffdiff2, convert it to
|
|
9 # the proper format for loading into R and generating an MDS plot
|
|
10 #
|
|
11 ################################################################
|
|
12
|
|
13 # check to make sure having correct files
|
|
14 my $usage = "usage: cuffdiff_mds_plot.pl [TABULAR.in] [TABULAR.out] [PLOT.png]\n";
|
|
15 die $usage unless @ARGV == 3;
|
|
16
|
|
17 #get the input arguments
|
|
18 my $inputFile = $ARGV[0];
|
|
19 my $outputFile = $ARGV[1];
|
|
20 my $plotFile = $ARGV[2];
|
|
21
|
|
22 #Open files
|
|
23 open (INPUT, "<", $inputFile) || die("Could not open file $inputFile \n");
|
|
24 open (OUTPUT, ">", $outputFile) || die("Could not open file $outputFile \n");
|
|
25 open (PLOT, ">", $plotFile) || die("Could not open file $plotFile \n");
|
|
26
|
|
27 # header looks like this:
|
|
28 # tracking_id condition replicate raw_frags internal_scaled_frags external_scaled_frags FPKM effective_length status
|
|
29 my $header = <INPUT>;
|
|
30
|
|
31 # read in the sample tracking file
|
|
32 while (<INPUT>) {
|
|
33 chomp;
|
|
34 @line = split /\t/;
|
|
35 $tracking_id{$line[0]} = 1;
|
|
36 $sample = $line[1] . "-" . $line[2];
|
|
37 $fpkm{$sample}{$line[0]} = $line[6];
|
|
38 }
|
|
39 close(INPUT);
|
|
40
|
|
41 @sorted_tracking_id = sort( keys(%tracking_id));
|
|
42
|
|
43 # print out header
|
|
44 foreach $tracking_id (@sorted_tracking_id) {
|
|
45 print OUTPUT "\t$tracking_id";
|
|
46 }
|
|
47 print OUTPUT "\n";
|
|
48
|
|
49 # print out data
|
|
50 foreach $sample (keys(%fpkm)) {
|
|
51 print OUTPUT "$sample";
|
|
52
|
|
53 foreach $tracking_id (@sorted_tracking_id) {
|
|
54 print OUTPUT "\t$fpkm{$sample}{$tracking_id}";
|
|
55 }
|
|
56
|
|
57 print OUTPUT "\n";
|
|
58 }
|
|
59 close(OUTPUT);
|
|
60
|
|
61 #variables to store the name of the R script file
|
|
62 my $r_script = "cuffinks2mdf.r";
|
|
63
|
|
64 open(Rcmd,">", $r_script) or die "Cannot open $r_script \n\n";
|
|
65 print Rcmd "
|
|
66 datat <- read.table(\"$outputFile\");
|
|
67 cmd <- cmdscale(dist(datat));
|
|
68 png(filename=\"$plotFile\");
|
|
69 plot(cmd[,1], cmd[,2], type=\"n\", ann=FALSE);
|
|
70 text(cmd[,1], cmd[,2], labels = rownames(datat));
|
|
71 title(main=\"Multidimensional Scaling Plot\");
|
|
72 title(xlab= \"Dimension 1\");
|
|
73 title(ylab= \"Dimension 2\");
|
|
74 dev.off();
|
|
75 #eof" . "\n";
|
|
76
|
|
77 close Rcmd;
|
|
78
|
|
79
|
|
80 system("R --no-restore --no-save --no-readline < $r_script > $r_script.out");
|
|
81 #close the input and output files
|
|
82 close(PLOT);
|
|
83
|