/******************************************************************/
/* Biclustering Algorithm Using Dependent Dirichlet Process Prior */
/*                    and Equi-Energy Sampler                     */
/******************************************************************/
/*  Author: Hyungwon Choi, Sinae Kim, and Alexey Nesvizhskii      */
/******************************************************************/

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <math.h>
#include <time.h>
#include <sys/stat.h>
#include <float.h>
#include <ctype.h>
#include <assert.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_rng.h>
#include <gsl/gsl_cdf.h>
#include <gsl/gsl_randist.h>
#include <gsl/gsl_vector.h> 
#include <gsl/gsl_matrix.h>
#include <gsl/gsl_sort.h>
#include <gsl/gsl_sort_vector.h>
#include <gsl/gsl_linalg.h>
#include <gsl/gsl_sf_gamma.h>

#define _NBURN_ 5000
#define _NITER_ 20000
#define _MAX_NAME_ 100
#define _PRINT_FREQ_ 100
#define _SKIP_ 100

#define _DIM_ 2
/* #define _NP_ 15
#define _NB_ 200
#define _lambda_ 0.0
#define _nu_ 100.0 */

#define _MINVAR_ 1.0

#define _SIZE_RING_ 500  
#define _NUMLADDER_ 4

#define _RING_SORT_CYCLE_ 1000
#define _NUM_RECORDS_ 10

typedef struct tagDATA {
  int nprey;
  int nbait;
  int nIP;
  double *q;
  double **quantiles;
  double *priorScale;
  char **preyName;
  char **baitName;  /* unique bait name now */
  char **IPName;

  int **b2ip;
  int *n_b2ip;
  int *ip2b;

  int *whichPreyIsBait;
  double **d;
  int **zero;
  double *norm;
  int *self;
  int *use;

  int *ninter;
  int **inter;
} DATA;

typedef struct tagDPclust {
  double alpha;
  double *beta;
  int *z;                  /* z is the cluster indicator of bait j */
} DPclust;

typedef struct tagHDPprior {
  double rho;
  double gamma;
  double *eta;
  double **pi;
  int **y;                 /* y is the assignment of \phi to \omega for prey i in cluster k */
  double **omega;
} HDPprior;

typedef struct tagOPTCL {
  int *z;
  int **y;
  double ***phi;
} OPTCL;

typedef struct tagERING {
  int count;
  double energy[_SIZE_RING_];
  DPclust dp[_SIZE_RING_];
  HDPprior hdp[_SIZE_RING_];
} ERING;

typedef struct tagMODEL {
  int numLadder;
  int curLadder;
  int nprey;
  int nbait;
  int nIP;

  int nburn;
  int niter;

  int np;
  int nb;
  double lambda;
  double nu;
  double a;
  double b;
  double minvar;

  /* Likelihood Scores to Track */
  double curlikHT;
  double curlik;

  /* Main Components in the Sampler */
  DPclust dp;
  HDPprior hdp;
  double *sigma2;

  /* Optimal Clustering */
  OPTCL opt[_NUM_RECORDS_];
  double optScores[_NUM_RECORDS_];
  int optCount;
  int reportOrder[_NUM_RECORDS_];

  /* Energy Rings */
  ERING ering[_NUMLADDER_];
  ERING eringNew[_NUMLADDER_];
  double *H;
  double *T;
  double p_ee;
  int EEattempt;
  int EEaccept;
} MODEL;


/*************/
/* functions */
/*************/
int nrow(FILE *fp);
int ncol(FILE *fp);
int newlinechar(char *buf, int k);
void readHyperParam(FILE *fpp, MODEL *model);
void makeOutHeader(FILE *fpout, DATA *data);
void writeOut(FILE *fpout, MODEL *model, DATA *data, const int iter, char *moveType);
void printSampler(MODEL *model, DATA *data, int iter);
void writeEE(FILE *fp, MODEL *model, DATA *data);
void writeERING(MODEL *model);
void printHDP(MODEL *model);
void printVAR(MODEL *model, DATA *data);
void sortBaitIP(DATA *data);

void shuffleOPTCL(MODEL *model, DATA *data);

/* Initialization of basic units */
void quantiles(MODEL *model, DATA *data);
int readData(FILE *fp, MODEL *model, DATA *data);
void initDPclust(MODEL *model, DPclust *dp, DATA *data, const gsl_rng *r);
void initHDPprior(MODEL *model, HDPprior *hdp, DATA *data, const gsl_rng *r);
void memoryDPclust(MODEL *model, DPclust *dp, DATA *data);
void memoryHDPprior(MODEL *model, HDPprior *hdp, DATA *data);
void initOptimalCluster(MODEL *model, OPTCL *opt, DATA *data);
int initModel(MODEL *model, DATA *data, const gsl_rng *r);

void resetDPclust(MODEL *model, DPclust *dp, DATA *data, const gsl_rng *r);
void resetHDPprior(MODEL *model, HDPprior *hdp, DATA *data, const gsl_rng *r);
void resetMODEL(MODEL *model, DATA *data, const gsl_rng *r);


/* Free memory */
void freeDP(DPclust *dp);
void freeHDP(MODEL *model, HDPprior *hdp, int nprey);

/* ERING management */
void initERING(MODEL *model, ERING *ering, DATA *data);
void initEnergyTruncation(MODEL *model, DATA *data);
void setEnergyTruncation(MODEL *model, DATA *data);
void resetEnergyTruncation(MODEL *model, DATA *data, double *energy);
void rearrangeERING(MODEL *model, DATA *data);
void copyERINGtoERING(MODEL *mode, ERING *ering1, ERING *ering2, DATA *data);
void resetERING(MODEL *model, DATA *data, const gsl_rng *r);
void freeERING(MODEL *model, ERING *ering, DATA *data);

/********/
/* MCMC */
/********/
double logPrior_ij(MODEL *model, DATA *data, int i, int j);
double logPrior_Ki(MODEL *model, DATA *data, int i, int k);
double logPrior(MODEL *model, DATA *data);
double logPriorERING(MODEL *model, DATA *data, int ring, int index);

int findFirstOne(unsigned int *vec, int len);
double logGaussian(const double x, double *omega, double sigma2, const gsl_rng *r);
double loglik(MODEL *model, DATA *data, const gsl_rng *r);
double loglikBait(MODEL *model, DATA *data, int which, const gsl_rng *r);
double loglikBait_k(MODEL *model, DATA *data, int which, int k, const gsl_rng *r);
int loglikBaitSample(MODEL *model, DATA *data, int which, double p[], const gsl_rng *r, int *id);
double loglikEring(MODEL *model, DATA *data, int ring, int index, const gsl_rng *r);

/* Bait Clustering DP */
int nclust(const int *x, const int n);
void printp(const double *p, const int n);
int updateBeta(MODEL *model, DATA *data, const gsl_rng *r);
int updateZ(MODEL *model, DATA *data, const gsl_rng *r);
int updateZ2(MODEL *model, DATA *data, const gsl_rng *r);
void baitClusteringDP(MODEL *model, DATA *data, const gsl_rng *r);

/* Prey Clustering HDP */
void sampleBase(const gsl_rng *r, DATA *data, double *omega, double lambda, double nu, double a, double b, double mv);
void updateBase(const gsl_rng *r, DATA *data, double *omega, double *propose, double a, double b, double mv);
double logBaseEvaluate(double *omega, double lambda, double nu, double a, double b);
int updateEtaPi(MODEL *model, DATA *data, const gsl_rng *r);
int updateY(MODEL *model, DATA *data, const gsl_rng *r);
int updateOmega(MODEL *model, DATA *data, const gsl_rng *r);
void preyClusteringHDP(MODEL *model, DATA *data, const gsl_rng *r);
void varSelect(MODEL *model, DATA *data, const gsl_rng *r);
void imputeMiss(MODEL *model, DATA *data, const gsl_rng *r);
void updateSigma2(MODEL *model, DATA *data, const gsl_rng *r);
void blockedGibbs(MODEL *model, DATA *data, const gsl_rng *r, int upvar);

/* Equi-Energy Sampler */
void copyDPclust(MODEL *model, DPclust *dp1, DPclust *dp2, int nbait);
void copyHDPprior(MODEL *model, HDPprior *hdp1, HDPprior *hdp2, int nprey);
int sampleIndex(const gsl_rng *r, int n);
void copyToERING(MODEL *model, DATA *data, const gsl_rng *r);
void copyFromERING(MODEL *model, DATA *data, int ring, int index);
void EEsampler(MODEL *model, DATA *data, const gsl_rng *r);


/***********************/
/* Posterior Inference */
/***********************/
void optimizeY(MODEL *model, DATA *data, int k, int *id, const gsl_rng *r);

void reverse(char s[]);
void itoa(int n, char s[]);

/* I/O facility and MCMC diagnostic */
void outputClustering(MODEL *model, DATA *data); /* including parameter history, cluster numbers */
void optimalCluster(MODEL *model, DATA *data, const gsl_rng *r);
void posteriorDistributions(MODEL *model, DATA *data); /* HDP */
double takeMean(const double *phi);

/* 
   1) need functions for sorting rows based on the nested clusters 
   2) need object to hold information for the optimal clustering -- OPTCL above
*/


/* Distance Calculation */
int isEqual(unsigned int *x, unsigned int *y, const int len);

/* Report */

/* Misc. Math functions */
int checknum(const double x);
int checknums(const double *x, int n);
double vec_sum(const double *vec, int len);
double vec_partial_sum(const double *vec, int start, int end);
double vec_max(const double *vec, int len);
double vec_max_cond(const double *vec, int len, int *id);
int vec_max_index(const double *vec, int len);
double vec_min(const double *vec, int len);
int vec_min_index(const double *vec, int len);
double vec_mean(const double *vec, int len);
double vec_var(const double *vec, int len);
double vec_med(const double *vec, int len);
double vec_mad(const double *vec, int len);
int vec_int_max(const int *vec, int len);
int vec_int_sum(const int *vec, int len);
int vec_int_partial_sum(const int *vec, int start, int end);
int ranMultinom(const gsl_rng *r, double *p, int K);

