R Regression: multicollinearity essentials and vif
Revision as of 08:43, 2 December 2019 by Onnowpurbo (talk | contribs) (Created page with "# Ref: http://www.sthda.com/english/articles/39-regression-model-diagnostics/160-multicollinearity-essentials-and-vif-in-r/ library(tidyverse) library(caret) # Load the...")
library(tidyverse) library(caret)
# Load the data
data("Boston", package = "MASS")
# Split the data into training and test set
set.seed(123)
training.samples <- Boston$medv %>%
createDataPartition(p = 0.8, list = FALSE)
train.data <- Boston[training.samples, ]
test.data <- Boston[-training.samples, ]
# Build the model model1 <- lm(medv ~., data = train.data) # Make predictions predictions <- model1 %>% predict(test.data) # Model performance data.frame( RMSE = RMSE(predictions, test.data$medv), R2 = R2(predictions, test.data$medv) )
# Detecting multicollinearity car::vif(model1)
# Dealing with multicollinearity # Build a model excluding the tax variable model2 <- lm(medv ~. -tax, data = train.data) # Make predictions predictions <- model2 %>% predict(test.data) # Model performance data.frame( RMSE = RMSE(predictions, test.data$medv), R2 = R2(predictions, test.data$medv) )