Galaxy |

Changeset 2:311a4f16c483 (2017-02-27)

Previous changeset 1:7eab80f86779 (2017-02-27) Next changeset 3:5f670146a9af (2017-02-27)

Commit message:
Deleted selected files

removed:
src/README
src/cent_adjust.c
src/find_connected.c
src/flock1.c

diff -r 7eab80f86779 -r 311a4f16c483 src/README
--- a/src/README Mon Feb 27 13:26:09 2017 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,139 +0,0 @@
-This package contains R code for converting and transforming a binary
-FCS file, using the FCSTrans software, and the C code for running the
-flock1 and flock2 population identification software.
-
-src - contains the C code for flock1, flock2 and cent_adjust
-bin - contains the compiled C code for flock1, flock2 and cent_adjust,
-      plus the R code for using the FCSTrans algorithm for file
-      conversion and data transformation.
-doc - documentation for FCSTrans and FLOCK algorithms
-example - sample data and output from FCSTrans and FLOCK
-
-To run this software sucessfully the code assumes the software was installed
-in the /usr/local/flock directory and that R, plus the Bioconductor flowCore
-module has been installed. If you use the ipconvert.sh shell script, you
-may need to adjust the location of the RScript executable and the location
-of the FCSTrans.R code by editing the shell script.
-
-#############################################################################
-# Overview
-#############################################################################
-ImmPort-FLOCK
-
-FLOCK (FLOw Clustering without K), an automated population discovery tool for
-multidimensional FCM data was designed to specifically take into account the
-unique feature of FCM data and produce objective segregation of cell
-populations.
-
-FLOCK parameter settings can be customized by defining Bins and Density
-Threshold.  The number of bins is an integer specifying the number of
-equal-sized regions the data will be partitioned into on each axis. Increasing
-the number of bins increases the sensitivity to detect rare populations but
-may also result in single populations being divided.  Density Threshold is the
-cut-off value to separate the dense regions from background. It is a floating
-point number that helps define population centers; increasing the threshold
-may help separate major populations but could cause the algorithm to overlook
-rare populations.
-
-#############################################################################
-# Compiling C code
-#############################################################################
-cd bin
-cc -o flock1 ../src/flock1.c ../src/find_connected.c -lm
-cc -o flock2 ../src/flock2.c -lm
-cc -o cent_adjust ../src/cent_adjust.c -lm
-
-#############################################################################
-# FCSTrans
-#############################################################################
-A shell script named ipconvert.sh is included that runs the FCSTrans R
-code for converting and transforming a binary FCS file. The output consists
-of one text file containing the transformed channel intensity values and
-another file containing a list of the FCS parameters.
-
-cd bin
-/usr/local/flock/bin/ipconvert.sh ../example/data/FCS2.fcs
-/usr/local/flock/bin/ipconvert.sh ../example/data/FCS3.fcs
-
-#############################################################################
-# Running flock1 or flock2
-#############################################################################
-Running the FLOCK1 and FLOCK2 algorithms generate 8 output files that have
-generic file names. For this reason, it is recommended that one output
-directory be created for one input file to the program.
-
-cd example/output/FCS2
-/usr/local/flock/bin/flock1 ../../data/FCS2.txt
-
-cd example/output/FCS3
-/usr/local/flock/bin/flock2 ../../data/FCS3.txt
-
-Files created: MFI.txt, percentage.txt, population_id.txt, profile.txt,
-               flock_results.txt, coordinates.txt, population_center.txt
-               and percentage.txt
-
-Usage Information for FLOCK1
-----------------------------------------------------------------------------
-basic mode: flock1 fcs.txt
-advanced_ mode: flock1 fcs.txt num_bin density_index max_num_pop
-
-Usage Information for FLOCK2
-----------------------------------------------------------------------------
-basic mode: flock data_file
-advanced mode 0 (specify maximum # of pops): flock data_file max_num_pop
-advanced mode 1 (without # of pops): flock data_file num_bin density_index
-advanced mode 2 (specify # of pops): flock data_file num_bin density_index
-                                           number_of_pop
-advanced mode 3 (specify both # of pops): flock data_file num_bin density_index
-                                           number_of_pop max_num_pop
-
-FLOCK Output Files
-----------------------------------------------------------------------------
-coordinates.txt:
-    Output is the intensity values for each marker and event
-
-flock_results.txt:
-    A combination of the input file, event identifiers and population
-    identifiers.
-
-MFI.txt:
-    Provides the mean fluorescence intensity for each population for each
-    marker/parameter
-
-population_id.txt:
-    Contains population identifiers (i.e, values from [1 to n] where n is
-    the population assigned to the  corresponding events in the input data
-    file, one identifier per row.)
-
-population_center.txt:
-    Contains the centroid coordinates for each identified population
-
-percentage.txt:
-    Includes the population identifiers and percentage of events within that
-    population (relative to the whole data file)
-
-profile.txt:
-    Displays an expression profile, where the approximate expression level
-    for each marker is assigned a numeric value from 1-4, for each identified
-    population
-
-fcs_properties.txt:
-    Contains the number of events, number of populations, and number of
-    markers, as well as the algorithm parameters used in the  analysis
-
-#############################################################################
-# Running cent_adjust
-#############################################################################
-Running the cent_adjust algorithm generates 4 output files that have
-generic file names. For this reason, it is recommened that one output
-directory be created for one input file to the program.
-
-mkdir example/output/FCS2/cent_adjust
-cd example/output/FCS2/cent_adjust
-/usr/local/flock/bin/cent_adjust ../population_center.txt ../coordinates.txt
-
-Files created: MFI.txt, percentage.txt, population_id.txt and profile.txt
-
-Usage Information for cent_adjust
-----------------------------------------------------------------------------
-basic mode: cent_adjust input_center input_data_file

diff -r 7eab80f86779 -r 311a4f16c483 src/cent_adjust.c
--- a/src/cent_adjust.c Mon Feb 27 13:26:09 2017 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

b'@@ -1,1009 +0,0 @@\n-/////////////////////////////////////////////////////////\n-// Cent_adjust version number and modification history\n-// ImmPort BISC project\n-// Author: Yu "Max" Qian\n-// v1.01: Oct 16, 2009\n-// Line 899 of the main function:\n-// Changed kmean_term=1 to kmean_term=2\n-//////////////////////////////////////////////////////////\n-\n-\n-#include <time.h>\n-#include <stdio.h>\n-#include <stdlib.h>\n-#include <math.h>\n-#include <string.h>\n-\n-#define DEBUG 0\n-#define LINE_LEN 1024\n-#define FILE_NAME_LEN 128\n-#define PARA_NAME_LEN 64\n-#define MAX_VALUE 1000000000\n-#define CUBE 0\n-\n-\n-void getctrfileinfo(FILE *f_src_ctr, long *num_clust)\n-{\n-\tint ch=\'\\n\';\n-\tint prev=\'\\n\';\n-\tlong num_rows=0;\n-\n-\twhile ((ch = fgetc(f_src_ctr))!= EOF )\n- {\n-\t\tif (ch == \'\\n\')\n- {\n-\t\t\t++num_rows;\n- }\n-\t\tprev = ch;\n- }\n-\tif (prev!=\'\\n\')\n-\t\t++num_rows;\n-\t\n-\t*num_clust=num_rows;\n-\t//printf("center file has %ld rows\\n", *num_clust);\n-}\n-\n-/************************************* Read basic info of the source file **************************************/\n-void getfileinfo(FILE *f_src, long *file_Len, long *num_dm, char *name_string, int *time_ID)\n-{\n-\tchar src[LINE_LEN];\n-\tchar current_name[64];\n-\tchar prv;\n-\n-\tlong num_rows=0;\n-\tlong num_columns=0;\n-\tint ch=\'\\n\';\n-\tint prev=\'\\n\';\n-\tlong time_pos=0;\n-\tlong i=0;\n-\tlong j=0;\n-\n-\t\n-\n-\tsrc[0]=\'\\0\';\n-\tfgets(src, LINE_LEN, f_src);\n-\n-\tname_string[0]=\'\\0\';\n-\tcurrent_name[0]=\'\\0\';\n-\tprv=\'\\n\';\n-\n-\twhile ((src[i]==\' \') || (src[i]==\'\\t\')) //skip space and tab characters\n-\t\ti++;\n-\n-\twhile ((src[i]!=\'\\r\') && (src[i]!=\'\\n\')) //repeat until the end of the line\n-\t{\n-\t\tcurrent_name[j]=src[i];\n-\t\t\n-\t\tif ((src[i]==\'\\t\') && (prv!=\'\\t\')) //a complete word\n-\t\t{\n-\t\t\tcurrent_name[j]=\'\\0\';\n-\t\t\t\n- /* \n- * Commented out John Campbell, June 10 2010\n- * We no longer want to automatically remove Time column.\n- * This column should have been removed by column selection\n- if (0!=strcmp(current_name,"Time"))\n- {\n- num_columns++; //num_columns does not inlcude the column of Time\n- time_pos++;\n- strcat(name_string,current_name); \n- strcat(name_string,"\\t");\n- }\n- else\n- {\n- *time_ID=time_pos;\n- }\n- */\n-\n- num_columns++;\n- strcat(name_string,current_name);\n- strcat(name_string,"\\t");\n-\n-\t\t current_name[0]=\'\\0\';\n-\t\t j=0;\t\t\t\n-\t\t}\t\t\n-\t\t\n-\t\tif ((src[i]==\'\\t\') && (prv==\'\\t\')) //a duplicate tab or space\n-\t\t{\n-\t\t\tcurrent_name[0]=\'\\0\';\n-\t\t\tj=0;\n-\t\t}\n-\t\t\n- if (src[i]!=\'\\t\')\n-\t j++;\n-\n-\t\t\n-\t\tprv=src[i];\n-\t\ti++;\n-\t}\n-\t\n-\tif (prv!=\'\\t\') //the last one hasn\'t been retrieved\n-\t{\n-\t\tcurrent_name[j]=\'\\0\';\n- /* \n- * Commented out John Campbell, June 10 2010\n- * We no longer want to automatically remove Time column.\n- * This column should have been removed by column selection\n- if (0!=strcmp(current_name,"Time"))\n- {\n- num_columns++;\n- strcat(name_string,current_name);\n- time_pos++;\n- }\n- else\n- {\n- *time_ID=time_pos;\n- }\n- */\n-\n- num_columns++;\n- strcat(name_string,current_name);\n-\t}\n-\n-\tif (DEBUG==1)\n-\t{\n-\t\tprintf("time_ID is %d\\n",*time_ID);\n-\t\tprintf("name_string is %s\\n",name_string);\n-\t}\n-\n-\t// # of rows\n-\n-\twhile ((ch = fgetc(f_src))!= EOF )\n- {\n-\t\tif (ch == \'\\n\')\n- {\n-\t\t\t++num_rows;\n- }\n-\t\tprev = ch;\n- }\n-\tif (prev!=\'\\n\')\n-\t\t++num_rows;\n-\t\n-\t*file_Len=num_rows;\n-\t*num_dm=num_columns; \n-\n-\t//printf("original file size is %ld; number of dimensions is %ld\\n", *file_Len, *num_dm);\n-}\n-\n-\n-\n-///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////\n-/************************************* Read the source file into uncomp_data **************************************/\n-void readsource(FILE *f_src, lo'..b'0;\n-\tlong j=0;\n-\n-\tlong *cluster_id;\n-\tlong *IDmapping; //this is to keep the original populationID of the center.txt\n-\n-\tdouble kmean_term=0;\n-\t\n-\tdouble **cluster_center;\n-\tdouble **orig_data;\n-\tdouble **normalized_data;\n-\t\t\n-/*\n-\t_strtime( tmpbuf );\n- printf( "Starting time:\\t\\t\\t\\t%s\\n", tmpbuf );\n-\t_strdate( tmpbuf );\n- printf( "Starting date:\\t\\t\\t\\t%s\\n", tmpbuf );\n-*/\n-\n-\tif (argc!=3)\n-\t{\n-\t\tprintf("usage: cent_adjust input_center input_data_file\\n"); \n-\t\texit(0);\n-\t}\t\n-\t\n-\n-\tf_src_ctr=fopen(argv[1],"r");\t\n-\t\n-\t//read source data\n-\tf_src=fopen(argv[2],"r");\n-\t\n-\tgetfileinfo(f_src, &file_Len, &num_dm, name_string, &time_id); //get the filelength, number of dimensions, and num/name of parameters\n-\n-\trewind(f_src); //reset data file pointer\t\n-\n-\torig_data = (double **)malloc(sizeof(double*)*file_Len);\n-\tmemset(orig_data,0,sizeof(double*)*file_Len);\n-\tfor (i=0;i<file_Len;i++)\n-\t{\n-\t\torig_data[i]=(double *)malloc(sizeof(double)*num_dm);\n-\t\tmemset(orig_data[i],0,sizeof(double)*num_dm);\n-\t}\n-\t\n-\treadsource(f_src, file_Len, num_dm, orig_data, time_id); //read the data;\n-\t\n-\tfclose(f_src);\n-\t/////////////////////////////////////////////////////////////////////////////\n-\tgetctrfileinfo(f_src_ctr, &num_clust); //get how many populations\n-\tnorm_used=0;\n-\tdist_used=0;\n-\tkmean_term=2; //modified on Oct 16, 2009: changed kmean_term=1 to kmean_term=2\n-\n-\trewind(f_src_ctr); //reset center file pointer\n-\n-\t//read population center\n-\tcluster_center=(double **)malloc(sizeof(double*)*num_clust);\n-\tmemset(cluster_center,0,sizeof(double*)*num_clust);\n-\tfor (i=0;i<num_clust;i++)\n-\t{\n-\t\tcluster_center[i]=(double*)malloc(sizeof(double)*num_dm);\n-\t\tmemset(cluster_center[i],0,sizeof(double)*num_dm);\n-\t}\n-\tfor (i=0;i<num_clust;i++)\n-\t\tfor (j=0;j<num_dm;j++)\n-\t\t\tcluster_center[i][j]=0;\n-\n-\tIDmapping=(long *)malloc(sizeof(long)*num_clust);\n-\tmemset(IDmapping,0,sizeof(long)*num_clust);\n-\n-\treadcenter(f_src_ctr,num_clust,num_dm,cluster_center,IDmapping); //read population center\n- fclose(f_src_ctr);\n-\n-\t/////////////////////////////////////////////////////////////////////////////\n-\tnormalized_data=(double **)malloc(sizeof(double*)*file_Len);\n-\tmemset(normalized_data,0,sizeof(double*)*file_Len);\n-\tfor (i=0;i<file_Len;i++)\n-\t{\n-\t\tnormalized_data[i]=(double *)malloc(sizeof(double)*num_dm);\n-\t\tmemset(normalized_data[i],0,sizeof(double)*num_dm);\n-\t}\n-\t\n-\ttran(orig_data, file_Len, num_dm, norm_used, normalized_data);\n-\t/************************************************* Compute number of clusters *************************************************/\n-\t\n-\tcluster_id=(long*)malloc(sizeof(long)*file_Len);\n-\tmemset(cluster_id,0,sizeof(long)*file_Len);\n-\n-\tassign_event(normalized_data,num_clust,dist_used,kmean_term,file_Len,num_dm,cluster_id,cluster_center,0);\n-\n-\t\n-\t//show(orig_data,cluster_id,file_Len,num_clust,num_dm,show_data,num_disp,name_string); \n-\tshow(orig_data, cluster_id, file_Len, num_clust, num_dm, name_string, IDmapping);\n-\n-\tf_cid=fopen("population_id.txt","w");\n-\n-\tfor (i=0;i<file_Len;i++)\n-\t\tfprintf(f_cid,"%ld\\n",IDmapping[cluster_id[i]]);\n-\t\t\n-\n-\tfclose(f_cid);\n- \n-\t//added April 16, 2009\n-\tf_mfi=fopen("MFI.txt","w");\n-\n-\tfor (i=0;i<num_clust;i++)\n-\t{\n-\t\tfprintf(f_mfi,"%ld\\t",IDmapping[i]);\n-\n-\t\tfor (j=0;j<num_dm;j++)\n-\t\t{\n-\t\t\tif (j==num_dm-1)\n-\t\t\t\tfprintf(f_mfi,"%.0f\\n",cluster_center[i][j]);\n-\t\t\telse\n-\t\t\t\tfprintf(f_mfi,"%.0f\\t",cluster_center[i][j]);\n-\t\t}\n-\t}\n-\tfclose(f_mfi);\n-\n-\t//ended April 16, 2009\n-\n-\tfor (i=0;i<num_clust;i++)\n-\t\tfree(cluster_center[i]);\n-\tfree(cluster_center);\n-\t\t\n-\n-\t/********************************************** Release memory ******************************************/\n- \n-\tfor (i=0;i<file_Len;i++)\n-\t{\n-\t\tfree(orig_data[i]);\t\t\n-\t\tfree(normalized_data[i]);\n-\t}\n-\t\n-\tfree(orig_data);\n-\tfree(normalized_data);\n-\tfree(cluster_id);\n-\tfree(IDmapping);\n-\n-/*\n-\t_strtime( tmpbuf );\n- printf( "Ending time:\\t\\t\\t\\t%s\\n", tmpbuf );\n-\t_strdate( tmpbuf );\n- printf( "Ending date:\\t\\t\\t\\t%s\\n", tmpbuf );\n-*/\n-\n-}\n'

diff -r 7eab80f86779 -r 311a4f16c483 src/find_connected.c
--- a/src/find_connected.c Mon Feb 27 13:26:09 2017 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

@@ -1,176 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-
-//static const char *rcsid = "$Id: find_connected.c,v 1.1 2008/09/05 21:54:40 rpl Exp $";
-
-int find_connected(int **G, int num_dense_grids, int ndim, int *grid_clusterID);
-void depth_first(int startnode);
-
-void bail(const char *);        /* exits via abort */
-static void check_clusters(int *gcID, int ndense);
-static void merge_cluster(int from, int into);
-
-
-/* Vars that will not change througout the depth-first recursion.  We
-   store them here to avoid endless replication on the stack. */
-static int **Gr=0;
-static int *gcID = 0;          /* grid cluster IDs */
-static int *cluster_count=0;   /* count of nodes per cluster */
-static int ndense=0;
-static int ndim=0;
-/* cid changes between depth-first searches, but is constant within a
-   single search, so it goes here. */
-static int cid=0;
-
-/* Find connected components in the graph of neighboring grids defined in G.
- *
- * Output:
- *
- * grid_clusterID[]  -- cluster to which each dense grid was assigned
- * return value      -- number of clusters assigned.
- */
-int find_connected(int **G, int n_dense_grids, int num_dm, int *grid_clusterID)
-{
-  int nclust=0;                  /* number of clusters found */
-  int i;
-  int *subfac;
-  int subval=0,nempty=0;
-    int clustid=0;
-
-  size_t sz = n_dense_grids*sizeof(int);
-  cluster_count = malloc(sz);
-  if(!cluster_count)
-    bail("find_connected: Unable to allocate %zd bytes.\n");
-  memset(cluster_count,0,sz);
-
-  /* set up the statics that will be used in the DFS */
-  Gr=G;
-  gcID = grid_clusterID;
-  ndense = n_dense_grids;
-  ndim = num_dm;
-
-
-  for(i=0;i<ndense;++i)
-    grid_clusterID[i] = -1;
-
-  for(i=0;i<ndense;++i) {
-    if(grid_clusterID[i] < 0) {  /* grid hasn't been assigned yet */
-      cid = nclust++;
-      depth_first(i);
-    }
-  }
-
-#ifndef NDEBUG
-  check_clusters(gcID,ndense);
-#endif
-
-  /* At this point we probably have some clusters that are empty due to merging.
-     We want to compact the cluster numbering to eliminate the empty clusters. */
-
-  subfac = malloc(sz);
-  if(!subfac)
-    bail("find_connected: Unable to allocate %zd bytes.\n");
- subval=0;
- nempty=0;
-
-  /* cluster #i needs to have its ID decremented by 1 for each empty cluster with
-     ID < i.  Precaclulate the decrements in this loop: */
-  for(i=0;i<nclust;++i) {
-    //clustid = grid_clusterID[i];
-    if(cluster_count[i] == 0) {
-      subval++;
-      nempty++;
-    }
-    subfac[i] = subval;
-  }
-
-  /* Now apply the decrements to all of the dense grids */
-  for(i=0;i<ndense;++i) {
-   clustid = grid_clusterID[i];
-    grid_clusterID[i] -= subfac[clustid];
-  }
-
-#ifndef NDEBUG
-  //  check_clusters(gcID,ndense);
-#endif
-
-  /* correct the number of clusters found */
-  nclust -= nempty;
-
-  return nclust;
-}
-
-
-/* Do a depth-first search for a single connected component in graph
- * G.  Start from node, tag the nodes found with cid, and record
- * the tags in grid_clusterID.  Also, record the node count in
- * cluster_count.  If we find a node that has already been assigned to
- * a cluster, that means we're merging two clusters, so zero out the
- * old cid's node count.
- *
- * Note that our graph is constructed as a DAG, so we can be
- * guaranteed to terminate without checking for cycles.
- *
- * Note2: this function can potentially recurse to depth = ndense.
- * Plan stack size accordingly.
- *
- * Output:
- *
- * grid_clusterID[] -- array where we tag the nodes.
- * cluster_count[]  -- count of the number of nodes per cluster.
- */
-
-void depth_first(int node)
-{
-  int i;
-
-  if(gcID[node] == cid)         // we're guaranteed no cycles, but it is possible to reach a node
-    return;                     // through two different paths in the same cluster.  This early
-                                // return saves us some unneeded work.
-
-  /* Check to see if we are merging a cluster */
-  if(gcID[node] >= 0) {
-    /* We are, so zero the count for the old cluster. */
-    cluster_count[ gcID[node] ] = 0;
-    merge_cluster(gcID[node], cid);
-    return;
-  }
-
-  /* Update for this node */
-  gcID[node] = cid;
-  cluster_count[cid]++;
-
-  /* Recursively search the child nodes */
-  for(i=0; i<ndim; ++i)
-    if(Gr[node][i] >= 0)      /* This is a child node */
-      depth_first(Gr[node][i]);
-}
-
-void bail(const char *msg)
-{
-  fprintf(stderr,"%s",msg);
-  abort();
-}
-
-
-static void check_clusters(int *gcID, int ndense)
-{
-  int i;
-
-  for(i=0; i<ndense; ++i)
-    if(gcID[i] < 0) {
-      fprintf(stderr,"faulty cluster id at i= %d\n",i);
-      abort();
-    }
-}
-
-static void merge_cluster(int from, int into)
-{
-  int i;
-
-  for(i=0; i<ndense; ++i)
-    if(gcID[i] == from)
-      gcID[i] = into;
-}

diff -r 7eab80f86779 -r 311a4f16c483 src/flock1.c
--- a/src/flock1.c Mon Feb 27 13:26:09 2017 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000

[

b'@@ -1,2400 +0,0 @@\n-/*****************************************************************************\n-\t\n-\tFLOCK: FLOw cytometry Clustering without K (Named by: Jamie A. Lee and Richard H. Scheuermann) \n-\t\n-\tAuthor: (Max) Yu Qian, Ph.D.\n-\t\n-\tCopyright: Scheuermann Lab, Dept. of Pathology, UTSW\n-\t\n-\tDevelopment: November 2005 ~ Forever\n-\n-\tAlgorithm Status: May 2007: Release 1.0\n-\n-\tUsage: flock data_file\n-\t\t Note: the input file format must be channel values and the delimiter between two values must be a tab.\n-\n- Changes made July 23, 2010: made errors to STDERR\n-\tChanges made Nov 4, 2010: added one more error (select_num_bin<min_grid) || (select_num_bin>max_grid) to STDERR;\n-\t MAX_GRID changed to 50 as larger than 50 seems not useful for any file we have got\n-\t\n-******************************************************************************/\n-#include <time.h>\n-#include <stdio.h>\n-#include <stdlib.h>\n-#include <math.h>\n-#include <string.h>\n-#include <sys/stat.h>\n-#include <unistd.h>\n-#include <assert.h>\n-\n-\n-#define DEBUG 0\n-#define LINE_LEN 1024\n-#define FILE_NAME_LEN 128\n-#define PARA_NAME_LEN 64\n-#define MAX_VALUE 1000000000\n-#define MIN_GRID 6\n-#define MAX_GRID 50\n-\n-#define NORM_METHOD 2 //2 if z-score; 0 if no normalization; 1 if min-max \n-#define KMEANS_TERM 100\n-//#define MAX_NUM_POP 30\n-\n-\n-int find_connected(int **G, int num_dense_grids, int ndim, int *grid_clusterID);\n-\n-/************* Read basic info of the source file ****************************/\n-void getfileinfo(FILE *f_src, int *file_Len, int *num_dm, char *name_string, int *time_ID)\n-{\n- char src[LINE_LEN];\n- char current_name[64];\n- char prv;\n-\n- int num_rows=0;\n- int num_columns=0;\n- int ch=\'\\n\';\n- int prev=\'\\n\';\n- int time_pos=0;\n- int i=0;\n- int j=0;\n- int sw=0;\n-\n- src[0]=\'\\0\';\n- fgets(src, LINE_LEN, f_src);\n-\n- if ((src[0]==\'F\') && (src[1]==\'C\') && (src[2]==\'S\'))\n-\t{\n-\t\tfprintf(stderr,"the correct input format is a tab-delimited txt file, instead of FCS file.\\n");\n-\t\tabort();\n-\t}\n-\n- name_string[0]=\'\\0\';\n- current_name[0]=\'\\0\';\n- prv=\'\\n\';\n-\n- // skip space and tab characters\n- while ((src[i]==\' \') || (src[i]==\'\\t\'))\n- i++;\n-\n- // repeat until the end of line is reached\n- while ((src[i]!=\'\\0\') && (src[i]!=\'\\n\') && (src[i]!=\'\\r\'))\n- {\n- current_name[j]=src[i];\n-\t\t\n- if ((src[i]==\'\\t\') && (prv!=\'\\t\')) //a complete word\n- {\n- current_name[j]=\'\\0\';\n-\t\t\t\n- if (0!=strcmp(current_name,"Time"))\n- {\n- num_columns++; //num_columns does not inlcude the column of Time\n- time_pos++;\n- if (sw) {\n- strcat(name_string,"\\t");\n- }\n- strcat(name_string,current_name); \n- sw = 1;\n- }\n- else\n- {\n- *time_ID=time_pos;\n- }\n- \n- \n- current_name[0]=\'\\0\';\n- j=0;\t\t\t\n- }\t\t\n-\t\t\n- if ((src[i]==\'\\t\') && (prv==\'\\t\')) //a duplicate tab or space\n- {\n- current_name[0]=\'\\0\';\n- j=0;\n- }\n-\t\t\n- if (src[i]!=\'\\t\')\n- j++;\n-\t\t\n- prv=src[i];\n- i++;\n- }\n-\t\n- if (prv!=\'\\t\') //the last one hasn\'t been retrieved\n- {\n- current_name[j]=\'\\0\';\n- \n- if (0!=strcmp(current_name,"Time"))\n- {\n- num_columns++;\n- strcat(name_string,"\\t");\n- strcat(name_string,current_name);\n- time_pos++;\n- }\n- else\n- {\n- *time_ID=time_pos;\n- }\n- \n- \n- }\n- if (DEBUG==1)\n- {\n- printf("time_ID is %d\\n",*time_ID);\n- printf("name_string is %s\\n",name_string);\n- }\n-\n- //start computing # of rows\n-\n- while ((ch = fgetc(f_src))!= EOF )\n- {\n- if (ch == \'\\n\')\n- {\n- ++num_rows;\n- }\n- prev = ch;\n- }\n- if (prev!=\'\\n\')\n- ++num_rows;\n-\n- //added on July 23, 2010\n- if (num_rows<50)\n- {\n- fprintf(stderr,"Number of events '..b'lation_ID=(int*)malloc(sizeof(int)*file_Len);\n- memset(all_population_ID,0,sizeof(int)*file_Len);\n-\n- kmeans(normalized_data, num_population, KMEANS_TERM, file_Len, num_dm, all_population_ID, population_center);\n- show(input_data, all_population_ID, file_Len, num_population, num_dm, para_name_string);\n-\n- ID2Center_all(input_data,file_Len,num_dm,num_population,all_population_ID,population_center);\n- \n-\n- f_cid=fopen("population_id.txt","w");\n- f_ctr=fopen("population_center.txt","w");\n- f_out=fopen("coordinates.txt","w");\n- f_results=fopen("flock_results.txt","w");\n-\n-/*\n- f_parameters=fopen("parameters.txt","w");\n- fprintf(f_parameters,"Number_of_Bins\\t%d\\n",num_bin);\n- fprintf(f_parameters,"Density\\t%f\\n",aver_index);\n- fclose(f_parameters);\n-*/\n-\n- for (i=0;i<file_Len;i++)\n-\tfprintf(f_cid,"%d\\n",all_population_ID[i]+1); //all_population_ID[i] changed to all_population_ID[i]+1 to start from 1 instead of 0: April 16, 2009\n-\n- /*\n- * New to check for min/max to add to parameters.txt\n- *\n- */\n- \n- fprintf(f_out,"%s\\n",para_name_string);\n- //fprintf(f_results,"%s\\tEvent\\tPopulation\\n",para_name_string);\n- fprintf(f_results,"%s\\tPopulation\\n",para_name_string);\n- for (i=0;i<file_Len;i++)\n- {\n-\tfor (j=0;j<num_dm;j++)\n-\t{\n-\t\tif (input_data[i][j] < min) {\n-\t\t\tmin = (int)input_data[i][j];\n-\t\t}\n-\t\tif (input_data[i][j] > max) {\n-\t\t\tmax = (int)input_data[i][j];\n-\t\t}\n-\t\tif (j==num_dm-1)\n-\t\t{\n-\t\t\tfprintf(f_out,"%d\\n",(int)input_data[i][j]);\n-\t\t\tfprintf(f_results,"%d\\t",(int)input_data[i][j]);\n-\t\t}\n-\t\telse\n-\t\t{\n-\t\t\tfprintf(f_out,"%d\\t",(int)input_data[i][j]);\n-\t\t\tfprintf(f_results,"%d\\t",(int)input_data[i][j]);\n-\t\t}\n-\t}\n-\t//fprintf(f_results,"%d\\t",i + 1);\n-\tfprintf(f_results,"%d\\n",all_population_ID[i]+1); //all_population_ID[i] changed to all_population_ID[i]+1 to start from 1 instead of 0: April 16, 2009\n- }\n-\n-/*\n- f_parameters=fopen("parameters.txt","w");\n- fprintf(f_parameters,"Number_of_Bins\\t%d\\n",num_bin);\n- fprintf(f_parameters,"Density\\t%d\\n",den_t_event);\n- fprintf(f_parameters,"Min\\t%d\\n",min);\n- fprintf(f_parameters,"Max\\t%d\\n",max);\n- fclose(f_parameters);\n-*/\n-\n- f_properties=fopen("fcs.properties","w");\n- fprintf(f_properties,"Bins=%d\\n",num_bin);\n- fprintf(f_properties,"Density=%d\\n",den_t_event);\n- fprintf(f_properties,"Min=%d\\n",min);\n- fprintf(f_properties,"Max=%d\\n",max);\n- fprintf(f_properties,"Populations=%d\\n",num_population);\n- fprintf(f_properties,"Events=%d\\n",file_Len);\n- fprintf(f_properties,"Markers=%d\\n",num_dm);\n- fclose(f_properties);\n-\n-\n- for (i=0;i<num_population;i++) {\n-\t/* Add if we want to include population id in the output\n-\t*/\n-\tfprintf(f_ctr,"%d\\t",i+1); //i changed to i+1 to start from 1 instead of 0: April 16, 2009\n-\n-\tfor (j=0;j<num_dm;j++) {\n-\t\tif (j==num_dm-1)\n-\t\t\tfprintf(f_ctr,"%.0f\\n",population_center[i][j]);\n-\t\telse\n-\t\t\tfprintf(f_ctr,"%.0f\\t",population_center[i][j]);\n-\t}\n- }\n-\n- \t//added April 16, 2009\n-\tf_mfi=fopen("MFI.txt","w");\n-\n-\tfor (i=0;i<num_population;i++)\n-\t{\n-\t\tfprintf(f_mfi,"%d\\t",i+1);\n-\n-\t\tfor (j=0;j<num_dm;j++)\n-\t\t{\n-\t\t\tif (j==num_dm-1)\n-\t\t\t\tfprintf(f_mfi,"%.0f\\n",population_center[i][j]);\n-\t\t\telse\n-\t\t\t\tfprintf(f_mfi,"%.0f\\t",population_center[i][j]);\n-\t\t}\n-\t}\n-\tfclose(f_mfi);\n-\n-\t//ended April 16, 2009\n-\t\t\t\n- fclose(f_cid);\n- fclose(f_ctr);\n- fclose(f_out);\n- fclose(f_results);\n-\n-\n- for (i=0;i<num_population;i++)\n- {\n-\tfree(population_center[i]);\n- }\n- free(population_center);\n- \n-\n- for (i=0;i<file_Len;i++)\n- free(normalized_data[i]);\n- free(normalized_data);\t\n-\t\n- free(grid_populationID);\n-\n- free(cluster_populationID);\n- free(grid_clusterID);\n- free(cluster_ID);\n-\n- for (i=0;i<file_Len;i++)\n- free(input_data[i]);\n- free(input_data);\n-\n- free(grid_ID);\n- free(population_ID);\n- free(all_population_ID);\n- free(eventID_To_denseventID);\n-\t\t\n- ///////////////////////////////////////////////////////////\n- printf("Ending time:\\t\\t\\t\\t");\n- fflush(stdout);\n- system("/bin/date");\n-\n- return 0;\n-\n-}\n'