#############CORRESPONDENCE ANALYSIS > library(ggplot2, pos=16) > library(factoextra, pos=16) > library(FactoMineR, pos=18) > data(housetasks, package="factoextra") > library(MASS, pos=19) > attach (housetasks) > house1 <- CA(housetasks) > fviz_eig(house1) > get_eigenvalue (house1) eigenvalue variance.percent cumulative.variance.percent Dim.1 0.5428893 48.69222 48.69222 Dim.2 0.4450028 39.91269 88.60491 Dim.3 0.1270484 11.39509 100.00000 > #############TREE -BASED MODELLING > library(tree, pos=21) > library(randomForest, pos=22) > summary (iris) Sepal.Length Sepal.Width Petal.Length Petal.Width Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300 Median :5.800 Median :3.000 Median :4.350 Median :1.300 Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800 Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500 Species setosa :50 versicolor:50 virginica :50 > mytree <- tree (Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, data = iris) > plot (mytree) > text (mytree) > summary (mytree) Classification tree: tree(formula = Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, data = iris) Variables actually used in tree construction: [1] "Petal.Length" "Petal.Width" "Sepal.Length" Number of terminal nodes: 6 Residual mean deviance: 0.1253 = 18.05 / 144 Misclassification error rate: 0.02667 = 4 / 150 > mytree node), split, n, deviance, yval, (yprob) * denotes terminal node 1) root 150 329.600 setosa ( 0.33333 0.33333 0.33333 ) 2) Petal.Length < 2.45 50 0.000 setosa ( 1.00000 0.00000 0.00000 ) * 3) Petal.Length > 2.45 100 138.600 versicolor ( 0.00000 0.50000 0.50000 ) 6) Petal.Width < 1.75 54 33.320 versicolor ( 0.00000 0.90741 0.09259 ) 12) Petal.Length < 4.95 48 9.721 versicolor ( 0.00000 0.97917 0.02083 ) 24) Sepal.Length < 5.15 5 5.004 versicolor ( 0.00000 0.80000 0.20000 ) * 25) Sepal.Length > 5.15 43 0.000 versicolor ( 0.00000 1.00000 0.00000 ) * 13) Petal.Length > 4.95 6 7.638 virginica ( 0.00000 0.33333 0.66667 ) * 7) Petal.Width > 1.75 46 9.635 virginica ( 0.00000 0.02174 0.97826 ) 14) Petal.Length < 4.95 6 5.407 virginica ( 0.00000 0.16667 0.83333 ) * 15) Petal.Length > 4.95 40 0.000 virginica ( 0.00000 0.00000 1.00000 ) * > ##########RANDOM FOREST > iris.forest <- randomForest(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, data=iris, importance=TRUE, proximity=TRUE) > print(iris.forest) Call: randomForest(formula = Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, data = iris, importance = TRUE, proximity = TRUE) Type of random forest: classification Number of trees: 500 No. of variables tried at each split: 2 OOB estimate of error rate: 4.67% Confusion matrix: setosa versicolor virginica class.error setosa 50 0 0 0.00 versicolor 0 47 3 0.06 virginica 0 4 46 0.08 > importance(iris.forest) setosa versicolor virginica MeanDecreaseAccuracy MeanDecreaseGini Sepal.Length 6.608420 8.721397 9.346867 12.092858 10.393502 Sepal.Width 5.059958 1.347413 5.463243 6.113979 2.324208 Petal.Length 22.151979 34.858941 27.287901 34.162053 42.093222 Petal.Width 22.205381 32.496050 30.104606 32.602838 44.494161 > plot(iris.forest) #######################SCRIPT #############CORRESPONDENCE ANALYSIS library(ggplot2, pos=16) library(factoextra, pos=16) library(FactoMineR, pos=18) data(housetasks, package="factoextra") attach (housetasks) house1 <- CA(housetasks) fviz_eig(house1) get_eigenvalue (house1) #############TREE -BASED MODELLING library(tree, pos=21) library(randomForest, pos=22) summary (iris) mytree <- tree (Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, data = iris) plot (mytree) text (mytree) summary (mytree) mytree ##########RANDOM FOREST iris.forest <- randomForest(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, data=iris, importance=TRUE, proximity=TRUE) print(iris.forest) importance(iris.forest) plot(iris.forest)