## ----setup, echo = TRUE, include=FALSE----------------------------------- knitr::opts_chunk$set(echo = FALSE) ## ----cars, echo = TRUE, eval = FALSE------------------------------------- ## install.packages("tidyverse") ## install.packages("dplyr") ## ## ---- eval = FALSE, echo = TRUE------------------------------------------ ## names(data) <- c("new_name", "another_new_name") ## colnames(data)[colnames(data)=="old_name"] <- "new_name" ## ## ---- echo = TRUE, eval = FALSE------------------------------------------ ## rename(data, new_name = old_name) ## select(data, variable = starts_with('S')) #rename ## #columns in a group ## ---- echo = TRUE, eval = FALSE------------------------------------------ ## data$Variable[data$Variable== 1] <- "Value" ## data$Variable[data$Variable > 1] <- "Value2" ## ## recode(Variable, `1` = "Value", .default = "Value2") #dplyr ## replace Variable = "Value" if Variable == 1 ## replace Variable = "Value2" if Variable > 1 ## ## ---- echo = TRUE, eval = FALSE------------------------------------------ ## data$Variable[data$Variable < 3 & ## data$Variable2 == "True"] <- NA ## ## ---- echo = TRUE, eval = FALSE------------------------------------------ ## # create new dataset without missing data ## newdata <- na.omit(data) ## ## # list rows of data that have missing values ## mydata[!complete.cases(mydata),] ## ---- echo = TRUE, eval = FALSE------------------------------------------ ## data$Var_log <- log(data$Variable) ## gen Var_log = log(Variable) ## ---- echo = TRUE, eval = FALSE------------------------------------------ ## newdata <- mydata[!mpg] ## drop if mpg > 21 ## drop mpg ## ---- echo = FALSE, message=FALSE, eval = TRUE--------------------------- rm(list=ls()) library(dplyr) library(tidyverse) setwd("~/Dropbox/Workshops/Teaching R/") ## ---- echo = TRUE, message=FALSE, eval = TRUE---------------------------- library(haven) cses_work <- read_dta("cses_work.dta") ## ---- echo = TRUE, message=FALSE, eval = TRUE---------------------------- ncol(cses_work) ## ---- echo = TRUE, message=FALSE, eval = TRUE---------------------------- cses_org <- cses_work %>% dplyr::select(Country = IMD1006_NAM, Country_year = IMD1004, Year = IMD1008_YEAR, Age = IMD2001_1, #in years Female = IMD2002, #1 is male, 2 is female Education = IMD2003, Income = IMD2006, Urban = IMD2007, Turnout = IMD3001, Ideology = IMD3006, Identifier = IMD3005_1, )%>% drop_na(Turnout, Ideology, Age) ## ---- echo = TRUE, message=FALSE, eval = FALSE--------------------------- ## head(cses_org) ## ---- echo = FALSE, message=FALSE, eval = TRUE--------------------------- library(kableExtra) kable(head(cses_org[,1:6]))%>% kable_styling(position = "center") ## ---- echo = TRUE, message=FALSE, eval = FALSE--------------------------- ## tail(cses_org) ## ---- echo = FALSE, message=FALSE, eval = TRUE--------------------------- library(kableExtra) kable(tail(cses_org[,1:6]))%>% kable_styling(position = "center") ## ---- echo = TRUE, message=FALSE, eval = FALSE--------------------------- ## str(cses_org) ## ---- echo = TRUE, message=FALSE, eval = FALSE--------------------------- ## summary(cses_org) ## ---- eval = FALSE, echo= TRUE------------------------------------------- ## ## cses_new <- cses_org[which(cses_org$Female==1 ## & cses_org$Age > 25), -which(names(cses_org) %in% ## c("Ideology", "Identifier"))] ## ---- eval = FALSE, echo= TRUE------------------------------------------- ## cses_new <- subset(cses_org, Female==1 & Age > 25, ## select=Country:Turnout)) ## ---- echo = TRUE, message=FALSE, eval = TRUE---------------------------- cses_org <- cses_org %>% filter(Female <= 2)%>% mutate(Female = (Female - 1))%>% filter(Year > 2005)%>% filter(!grepl('South', Country)) ## ---- echo = TRUE, message=FALSE, eval = TRUE---------------------------- unique(cses_work$IMD2002) unique(cses_org$Female) ## ---- echo = FALSE, message=FALSE, eval = TRUE--------------------------- kable(head(cses_org[,1:6]))%>% kable_styling(position = "center") ## ---- echo = FALSE, message=FALSE, eval = TRUE--------------------------- kable(tail(cses_org[,1:5]))%>% kable_styling(position = "center") ## ----echo = TRUE, eval = TRUE-------------------------------------------- vdem_work <- read_dta("vdem_work.dta") ## ---- echo = TRUE, eval = TRUE------------------------------------------- cses_merge <- merge(cses_org, vdem_work, by = c("Country", "Year"), type = "left", match = "all") ## ---- echo = FALSE, eval = TRUE------------------------------------------ kable(head(cses_merge[,c(1:2,4:5,12,15)]))%>% kable_styling(position = "center") ## ---- echo = TRUE-------------------------------------------------------- unique(cses_merge$Turnout) ## ---- echo = TRUE, eval = TRUE------------------------------------------- cses_merge$Turnout[cses_merge$Turnout > 1 ] <- NA ## ---- echo = TRUE, eval = TRUE------------------------------------------- cses_summary <- cses_merge %>% group_by(Country)%>% summarize_at(vars(Turnout.mean = "Turnout", Female.mean = "Female"), mean, na.rm = TRUE) ## ---- echo = FALSE, eval = TRUE------------------------------------------ kable(cses_summary[c(1:6, 42:45),])%>% kable_styling(position = "center") ## ---- echo = FALSE, eval = TRUE, message=FALSE--------------------------- library(reshape) ## ---- echo = TRUE, eval = TRUE, message=FALSE, warning=FALSE------------- melt.data <- melt(cses_merge, id=c("Country","Year")) ## ---- echo = FALSE, eval = TRUE------------------------------------------ kable(melt.data[c(1, 5000, 15000),])%>% kable_styling(position = "center") ## ---- echo = TRUE, eval = FALSE------------------------------------------ ## # cast(data, formula, function) ## Country.means <- cast(melt.data, Country~variable, mean) ## Year.means <- cast(melt.data, Year~variable, mean) ## ## ---- eval = FALSE, echo = FALSE----------------------------------------- ## ## library(knitr) ## ## purl("~/Dropbox/Workshops/Teaching R/Data Managment R.rmd", output = "~/Dropbox/Workshops/Teaching R/Data_Managment_R_files/data-manage_Jan_20_2020.R") ##