Revision | 37139e651931737bdbc0cd56fdd463686b017ce7 (tree) |
---|---|
Time | 2014-02-11 05:34:40 |
Author | Lorenzo Isella <lorenzo.isella@gmai...> |
Commiter | Lorenzo Isella |
I modified the code to run the random forest for the Kaggle loan competition.
@@ -35,7 +35,10 @@ | ||
35 | 35 | # loss = n.genfromtxt('loss_data.dat',dtype=float) |
36 | 36 | |
37 | 37 | |
38 | -train = pd.read_csv('train_data_fixed.csv') | |
38 | +# train = pd.read_csv('train_data_fixed.csv') | |
39 | + | |
40 | + | |
41 | +train = n.loadtxt('train_data_fixed_no_header.dat',dtype="float") | |
39 | 42 | |
40 | 43 | #but I still use numpy for this one as it will return the right 1D array |
41 | 44 |
@@ -48,7 +51,16 @@ | ||
48 | 51 | # I am now already cleaning and scaling the data in an R code, so I do not need |
49 | 52 | # to redo this now |
50 | 53 | |
51 | -# train_balance = s.mean(train, axis=0) | |
54 | +# train_mean = s.mean(train, axis=0) | |
55 | + | |
56 | +# train_std= s.std(train, axis=0) | |
57 | + | |
58 | +# train=train-train_mean | |
59 | +# train=train/train_std | |
60 | + | |
61 | +# n.savetxt("train_mean.dat", train_mean) | |
62 | +# n.savetxt("train_std.dat", train_std) | |
63 | + | |
52 | 64 | |
53 | 65 | # print "s.shape(train_balance) is, " |
54 | 66 |