import java.util.*;

public class EntryPoint{
    
    //required objects
    static DataReader reader = null;
    static RegressionTree treeBuilder = null;
    static DecisionTree dtreeBuilder = null;

    //Dataset constants
    static final String HOUSING_DATASET_PATH = "/home/rahul/data/src/ml/dataset/hw1/housing/housing_train.txt";
    static final String HOUSING_TESTSET_PATH = "/home/rahul/data/src/ml/dataset/hw1/housing/housing_test.txt";

    //static final String HOUSING_DATASET_PATH = "/home/rahul/data/src/ml/dataset/hw1/housing/housing_test.txt";
    static final int HOUSING_ROW_COUNT = 433;
    static final int HOUSING_TEST_ROW_COUNT = 74;
    static final int HOUSING_COL_COUNT = 14;
    static final String HOUSING_DATA_TYPE = "SSV";
    static final int[] HOUSING_NUMERIC_COL_IDXS = {0,1,2,4,5,6,7,8,9,10,11,12};//started with 0 for convenience
    static final int[] HOUSING_NOMINAL_COL_IDXS = {3};//0 based
    static final int HOUSING_LABEL_COL_IDX = 13;// 0 based



    static final String SPAMBASE_DATASET_PATH = "/home/rahul/data/src/ml/dataset/hw1/spam/spambase.data";
    //static final String HOUSING_DATASET_PATH = "/home/rahul/data/src/ml/dataset/hw1/housing/housing_test.txt";
    static final int SPAMBASE_ROW_COUNT = 4601;
    static final int SPAMBASE_COL_COUNT = 58;
    static final String SPAMBASE_DATA_TYPE = "CSV";
    static final int[] SPAMBASE_NUMERIC_COL_IDXS = {0,1,2,3,4,5,6,7,8,9,10,
						    11,12,13,14,15,16,17,18,19,20,
						    21,22,23,24,25,26,27,28,29,30,
						    31,32,33,34,35,36,37,38,39,40,
						    41,42,43,44,45,46,47,48,49,50,
						    51,52,53,54,55,56};//started with 0 for convenience
    static final int[] SPAMBASE_NOMINAL_COL_IDXS = {};//0 based
    static final int SPAMBASE_LABEL_COL_IDX = 57;// 0 based
    /*
    static final int SPAMBASE_ROW_COUNT = 8;
    static final int SPAMBASE_COL_COUNT = 3;
    static final String SPAMBASE_DATA_TYPE = "CSV";
    static final int[] SPAMBASE_NUMERIC_COL_IDXS = {};//started with 0 for convenience
    static final int[] SPAMBASE_NOMINAL_COL_IDXS = {0,1};//0 based
    static final int SPAMBASE_LABEL_COL_IDX = 2;// 0 based
    */
    public static void main(String[] args) throws Exception {
	try {
	    //	int dbSelector = Integer.parseInt(args[0]);
        int dbSelector = 1;
		reader = new DataReader();
		treeBuilder = new RegressionTree();
		dtreeBuilder = new DecisionTree();
		System.out.println("");
		double[][] data = null;
		DataProcessor processor = new DataProcessor();
		HashMap<Integer, double[]> ths = null;
		ArrayList<SplitOption> totalSO = null;
		ArrayList<double[]> normArrays = null;
		ArrayList<double[][]> folds = null;
		// regression tree for housing data
		if(dbSelector == 0 ){
		    data = reader.readFile(HOUSING_DATASET_PATH, 
					   HOUSING_DATA_TYPE,
					   HOUSING_ROW_COUNT, 
					   HOUSING_COL_COUNT);
		    double[][] trainSet = reader.readFile(HOUSING_DATASET_PATH, 
					   HOUSING_DATA_TYPE,
					   HOUSING_ROW_COUNT, 
					   HOUSING_COL_COUNT);
		    normArrays =  processor.normalizeNumericCols(data, HOUSING_NUMERIC_COL_IDXS, 0);
		    ths  = processor.getThresholds(data, HOUSING_NUMERIC_COL_IDXS);
		    totalSO = processor.getSplitOptions(data,
							HOUSING_NUMERIC_COL_IDXS,
							HOUSING_NOMINAL_COL_IDXS, ths);
		    int count = totalSO.size();
		    System.out.println("Total split options: " + count);
		    RegressionTree.Node modelRoot = treeBuilder.buildModel(data, 4, totalSO, HOUSING_LABEL_COL_IDX);
		    modelRoot.printNodeAndSubTree();

		    Log.write("Reading test dataset...");
		    double[][] testdata = reader.readFile(HOUSING_TESTSET_PATH, 
							  HOUSING_DATA_TYPE,
							  HOUSING_TEST_ROW_COUNT, 
							  HOUSING_COL_COUNT);
		    //Log.writeToFile(testdata, "test.csv", ",");
		    double[][] preds = treeBuilder.predict(trainSet, 
							   HOUSING_LABEL_COL_IDX, 
							   HOUSING_NUMERIC_COL_IDXS, 
							   modelRoot, 
							   normArrays.get(0), normArrays.get(1));

		    double MSE = treeBuilder.computeMSE(preds);
		    Log.write("");
		    Log.write("");
		    Log.write("MSE for training set is: "+ MSE );
		    preds = treeBuilder.predict(testdata, HOUSING_LABEL_COL_IDX, 
							   HOUSING_NUMERIC_COL_IDXS, 
							   modelRoot, 
							   normArrays.get(0), normArrays.get(1));
		    
		    //	double[][] preds = treeBuilder.predict(data, HOUSING_LABEL_COL_IDX, HOUSING_NUMERIC_COL_IDXS, modelRoot, normArrays.get(0), normArrays.get(1));
		    MSE = treeBuilder.computeMSE(preds);
		    Log.write("MSE for test set is: "+ MSE );
		} else {
		    data = reader.readFile(SPAMBASE_DATASET_PATH, 
					   SPAMBASE_DATA_TYPE,
					   SPAMBASE_ROW_COUNT, 
					   SPAMBASE_COL_COUNT);
		    //DecisionTree.Node modelRoot = dtreeBuilder.buildModel(data, 2, totalSO, SPAMBASE_LABEL_COL_IDX);
		    int foldCount = 10;
		    int foldRecordCount = data.length / foldCount;
		    int allFoldsRecordCount = 0; double testMSESum = 0.0; double trainMSESum = 0.0;
		    for(int k = 0; k< foldCount; k++){
			int testStartIDx = foldRecordCount * k;
			Log.write("");
			Log.write("Test set Start Index: "+ testStartIDx);
			int testEndIDx = testStartIDx;
			if(k == foldCount - 1){
			    testEndIDx += data.length - allFoldsRecordCount;
			    Log.write("Test set End Index: "+ testEndIDx);
			} else{
			    testEndIDx += foldRecordCount;
			    Log.write("Test set End Index: "+ (testEndIDx - 1));
			}
			allFoldsRecordCount += foldRecordCount;
			folds = processor.splitTrainTest(data, testStartIDx, testEndIDx);
			double[][] trainset = folds.get(0);
			double[][] testset = folds.get(1);
		
			//normArrays = processor.normalizeNumericCols(trainset, SPAMBASE_NUMERIC_COL_IDXS, 0);
			ths  = processor.getThresholds(trainset, SPAMBASE_NUMERIC_COL_IDXS);
			totalSO = processor.getSplitOptions(trainset,
							    SPAMBASE_NUMERIC_COL_IDXS,
							    SPAMBASE_NOMINAL_COL_IDXS, ths);
			//folds = processor.getKfolds(data, 10);
			int count = totalSO.size();
			System.out.println("Total split options: " + count);

			DecisionTree.Node modelRoot = dtreeBuilder.buildModel(trainset, 4, totalSO, SPAMBASE_LABEL_COL_IDX);
			modelRoot.printNodeAndSubTree();
			Log.write("");
			/*			double[][] preds = dtreeBuilder.predict(testset, 
								SPAMBASE_LABEL_COL_IDX, 
								SPAMBASE_NUMERIC_COL_IDXS, 
								modelRoot, 
								normArrays.get(0), 
								normArrays.get(1));
			*/
			double[][] preds = dtreeBuilder.predict(trainset, 
								SPAMBASE_LABEL_COL_IDX, 
								SPAMBASE_NUMERIC_COL_IDXS, 
								modelRoot, null, null);
			double trainMSE = dtreeBuilder.computeMSE(preds);
			//compute ROC statistics for training set
			PredictorStatsCalculator.computeROCStats(preds, 
								 new double[1], 
								 new int[] {preds.length},
								 false, true, false);
			
			Log.write("MSE for training set is: "+ trainMSE );
			preds = dtreeBuilder.predict(testset, SPAMBASE_LABEL_COL_IDX, 
								SPAMBASE_NUMERIC_COL_IDXS, 
								modelRoot, null, null);
			double testMSE = dtreeBuilder.computeMSE(preds);
			//compute ROC statistics for test set
			PredictorStatsCalculator.computeROCStats(preds, 
								 new double[1], 
								 new int[] {preds.length},
								 false, true, false);

			
			Log.write("MSE for test set is: "+ testMSE );
			testMSESum += testMSE; // sum test MSE for all folds
			trainMSESum += trainMSE; // sum train MSE for all folds
		    } // cross validation loop ends here
		    Log.write("");
		    Log.write("");
		    Log.write("Average test MSE of "+ foldCount + " runs is = "+ (testMSESum/ foldCount));
		    Log.write("Average train MSE of "+ foldCount + " runs is = "+ (trainMSESum/ foldCount));
		} //main if-else ends here
		//-------------------------------------------------------
		//test statements----------------------------------------
		//Log.writeToFile(data, "train.csv", ",");
		System.out.println("");

		//System.out.print(data[432][2]); // dataset data
		System.out.println("");
	} catch(Exception e){
	    System.out.println("Runtime Error occurred: ");
	    System.out.println(e);
	    throw e;
	}
    }
}
