## EPPS 6356 Assignment 4.R
## Min Shi
## Oct. 11th, 2021
# clear environment, set working directory and read the data
rm(list=ls())
setwd("/Users/min/Desktop/2021 Fall Semester/EPPS 6356 Data Visualization/Assignment 4/Results")
# load the libraries
library(aod)
library(ggplot2)
library(hrbrthemes) # for Chart 1 -- style of Variable Width Column Chart
library(dplyr) # for group_by function
library(reshape2) # for Chart 2 -- Table with Embedded Charts
library(viridis) # for Chart 3 -- Small multiple bar charts
library(hrbrthemes) # for Chart 3 -- Small multiple bar charts
library(qdap) # for Chart 4 -- character strip
# load the datasets
data1 <- read.csv("/Users/min/Desktop/2021 Fall Semester/EPPS 6356 Data Visualization/Assignment 4/population_by_country_2020.csv")
data2 <- read.csv("/Users/min/Desktop/2021 Fall Semester/EPPS 6356 Data Visualization/Assignment 4/world-happiness-report-2021.csv")
ls(data1)
## [1] "Country..or.dependency." "Density..P.Km.."
## [3] "Fert..Rate" "Land.Area..Km.."
## [5] "Med..Age" "Migrants..net."
## [7] "Net.Change" "Population..2020."
## [9] "Urban.Pop.." "World.Share"
## [11] "Yearly.Change"
ls(data2)
## [1] "Country.name"
## [2] "Dystopia...residual"
## [3] "Explained.by..Freedom.to.make.life.choices"
## [4] "Explained.by..Generosity"
## [5] "Explained.by..Healthy.life.expectancy"
## [6] "Explained.by..Log.GDP.per.capita"
## [7] "Explained.by..Perceptions.of.corruption"
## [8] "Explained.by..Social.support"
## [9] "Freedom.to.make.life.choices"
## [10] "Generosity"
## [11] "Healthy.life.expectancy"
## [12] "Ladder.score"
## [13] "Ladder.score.in.Dystopia"
## [14] "Logged.GDP.per.capita"
## [15] "lowerwhisker"
## [16] "Perceptions.of.corruption"
## [17] "Regional.indicator"
## [18] "Social.support"
## [19] "Standard.error.of.ladder.score"
## [20] "upperwhisker"
data1 <- rename(data1, Country = Country..or.dependency.)
data2 <- rename(data2, Country = Country.name)
mydata <- merge(data1, data2, by="Country")
#####################################
# Chart 1 Variable Width Column Chart
#####################################
# Sample 1
mydata <- rename(mydata, Region = Regional.indicator)
ls(mydata)
## [1] "Country"
## [2] "Density..P.Km.."
## [3] "Dystopia...residual"
## [4] "Explained.by..Freedom.to.make.life.choices"
## [5] "Explained.by..Generosity"
## [6] "Explained.by..Healthy.life.expectancy"
## [7] "Explained.by..Log.GDP.per.capita"
## [8] "Explained.by..Perceptions.of.corruption"
## [9] "Explained.by..Social.support"
## [10] "Fert..Rate"
## [11] "Freedom.to.make.life.choices"
## [12] "Generosity"
## [13] "Healthy.life.expectancy"
## [14] "Ladder.score"
## [15] "Ladder.score.in.Dystopia"
## [16] "Land.Area..Km.."
## [17] "Logged.GDP.per.capita"
## [18] "lowerwhisker"
## [19] "Med..Age"
## [20] "Migrants..net."
## [21] "Net.Change"
## [22] "Perceptions.of.corruption"
## [23] "Population..2020."
## [24] "Region"
## [25] "Social.support"
## [26] "Standard.error.of.ladder.score"
## [27] "upperwhisker"
## [28] "Urban.Pop.."
## [29] "World.Share"
## [30] "Yearly.Change"
mydata1_1 <- mydata %>% group_by(Region)
Ladder_by_region <- mydata1_1 %>% summarise(
Ladder_score = mean(Ladder.score)
)
Number_of_states <- count(mydata1_1, 'Region')
Chart1_data <- merge(Ladder_by_region, Number_of_states, by="Region")
Chart1_data <- rename(Chart1_data, Number_of_states = n)
# Calculate the future positions on the x axis of each bar (left border, central position, right border)
Chart1_data$w <- cumsum(Chart1_data$Number_of_states)
Chart1_data$wm <- Chart1_data$w - Chart1_data$Number_of_states
Chart1_data$wt <- with(Chart1_data, wm + (w-wm)/2)
# Plot
ggplot(Chart1_data, aes(ymin = 0)) +
geom_rect(aes(xmin = wm, xmax = w, ymax = Ladder_score, colour = Region, fill = Region)) +
theme_bw() +
labs(x = NULL, y = "Ladder Score of Happiness",
title = "Variable Width Column Chart for World Happiness Score in 2021") +
theme_ipsum() +
theme(legend.position="right") +
theme(axis.text.x = element_blank())
###########
# Sample 2
###########
mydata <- rename(mydata, Population = Population..2020.)
mydata1_2 <- mydata %>% group_by(Region)
Population_by_region <- mydata1_2 %>% summarise(
Population = mean(Population)
)
Chart1_data2 <- merge(Population_by_region, Number_of_states, by= "Region")
Chart1_data2 <- rename(Chart1_data2, Number_of_states = n)
# Calculate the future positions on the x axis of each bar (left border, central position, right border)
Chart1_data2$w <- cumsum(Chart1_data2$Number_of_states)
Chart1_data2$wm <- Chart1_data2$w - Chart1_data2$Number_of_states
Chart1_data2$wt <- with(Chart1_data2, wm + (w-wm)/2)
# Plot
ggplot(Chart1_data2, aes(ymin = 0)) +
geom_rect(aes(xmin = wm, xmax = w, ymax = Population, colour = Region, fill = Region)) +
theme_bw() +
labs(x = NULL, y = "Average Population by Region",
title = "Variable Width Column Chart for Average Population in 2020") +
theme_ipsum() +
theme(legend.position="right") +
theme(axis.text.x = element_blank())
#####################################
# Chart 2 Table with Embeded Chart
#####################################
############
# Sample 1
############
data3 <- read.csv("/Users/min/Desktop/2021 Fall Semester/EPPS 6356 Data Visualization/Assignment 4/world-happiness-report.csv")
data3 <- rename(data3, Country = Country.name)
mydata2_1 <- subset(data3, year >= 2015)
region_data <- data2[, 1:2]
mydata2_1 <- merge(mydata2_1, region_data, by="Country")
mydata2_1 <- rename(mydata2_1, Region = Regional.indicator)
mydata2_1 <- aggregate(x = mydata2_1$Life.Ladder,
by = list(mydata2_1$Region, mydata2_1$year),
FUN=mean)
mydata2_1 <- rename(mydata2_1, Region = Group.1, year = Group.2, Ladder_by_region_year = x)
ggplot(mydata2_1, aes(Region, Ladder_by_region_year, fill=as.factor(year)), angle=45, size=16)+
geom_bar(position="dodge", stat="identity") + facet_wrap(~Region, nrow=3)+
labs(x = NULL, y = "Average Happiness Score by Region-year",
title = "Table with Embeded Chart for Average Happiness Score") +
theme_ipsum() +
theme(legend.position="right") +
theme(axis.text.x = element_blank())
############
# Sample 2
############
mydata2_2 <- merge(data3, region_data, by="Country")
mydata2_2 <- rename(mydata2_2, Region = Regional.indicator)
mydata2_2 <- aggregate(x = mydata2_2$Freedom.to.make.life.choices,
by = list(mydata2_2$Region, mydata2_2$year),
FUN=mean)
mydata2_2 <- rename(mydata2_2, Region = Group.1, year = Group.2, Freedom.to.make.life.choices = x)
ggplot(mydata2_2, aes(Region, Freedom.to.make.life.choices, fill=as.factor(year)), angle=45, size=16)+
geom_bar(position="dodge", stat="identity") + facet_wrap(~Region, nrow=3)+
labs(x = NULL, y = "Freedom to make life choices score",
title = "Table with Embeded Chart for Freedom of Life Choices") +
theme_ipsum() +
theme(legend.position="right") +
theme(axis.text.x = element_blank())
#####################################################
# Chart 3 Bar Charts with Many Items (Small Multiple)
#####################################################
############
# Sample 1
############
mydata3_1 <- merge(data3, region_data, by="Country")
mydata3_1 <- rename(mydata3_1, Region = Regional.indicator)
mydata3_1 <- aggregate(x = mydata3_1$Life.Ladder,
by = list(mydata3_1$Region, mydata3_1$year),
FUN = mean)
mydata3_1 <- rename(mydata3_1, Region = Group.1, year = Group.2, Ladder_by_region_year = x)
ggplot(mydata3_1, aes(fill = Region, y = Ladder_by_region_year, x = Region)) +
geom_bar(position="dodge", stat="identity") +
scale_fill_viridis(discrete = T, option = "E") +
labs(x = NULL, y = "Average Happiness Score by Region",
title = "Small Multiple Bar Chart for Happiness (2015-2021)") +
facet_wrap(~year) +
theme_ipsum() +
theme(legend.position="right") +
theme(axis.text.x = element_blank())
############
# Sample 2
############
mydata3_2 <- merge(data3, region_data, by="Country")
mydata3_2 <- rename(mydata3_2, Region = Regional.indicator)
## Filling missing values
# country mean imputation
meanvals <- aggregate(mydata3_2$Log.GDP.per.capita, by=list(mydata3_2$Country), FUN="mean", na.rm=TRUE, na.action=NULL)
colnames(meanvals) <- c("Country", "ave_log_GDP_per_capita")
mydata3_2 <- merge(x = mydata3_2, y=meanvals, all.x=TRUE, by="Country")
mydata3_2$ave_log_GDP_per_capita[which(!is.na(mydata3_2$Log.GDP.per.capita))] <- mydata3_2$Log.GDP.per.capita[which(!is.na(mydata3_2$Log.GDP.per.capita))]
# generate the Average Logarithm of GDP per capita by Region
mydata3_2 <- aggregate(x = mydata3_2$ave_log_GDP_per_capita,
by = list(mydata3_2$Region, mydata3_2$year),
FUN = mean, na.rm=TRUE, na.action=NULL)
mydata3_2 <- rename(mydata3_2, Region = Group.1, year = Group.2, Ave.Log.GDP.per.capita = x)
ggplot(mydata3_2, aes(fill = Region, y = Ave.Log.GDP.per.capita, x = Region)) +
geom_bar(position="dodge", stat="identity") +
scale_fill_viridis(discrete = T, option = "E") +
labs(x = NULL, y = "Average Logarithm of GDP per capita by Region",
title = "Small Multiple Bar Chart for GDP per capita (2015-2021)") +
facet_wrap(~year) +
theme_ipsum() +
theme(legend.position="right") +
theme(axis.text.x = element_blank())
#######################################
# Chart 4 Column Charts with Few Items
#######################################
############
# Sample 1
############
mydata4_1 <- subset(data3, year == 2005 | year == 2020)
region_data <- data2[, 1:2]
mydata4_1 <- merge(mydata4_1, region_data, by="Country")
mydata4_1 <- rename(mydata4_1, Region = Regional.indicator)
mydata4_1 <- aggregate(x = mydata4_1$Life.Ladder,
by = list(mydata4_1$Region, mydata4_1$year),
FUN="mean", na.rm=TRUE, na.action=NULL)
mydata4_1 <- rename(mydata4_1, Region = Group.1, year = Group.2, Ladder_by_region_year = x)
ggplot(data = mydata4_1, aes(factor(year), y = Ladder_by_region_year, fill = Region)) +
geom_bar(position="dodge", stat="identity")+
labs(x = NULL, y = "Average Happpiness Score by Region",
title = "Grouped Bar Chart for Happiness in 2005 & 2020") +
scale_fill_brewer(palette = "Set1") +
theme(legend.position="right")
#theme(axis.text.x = element_blank())
############
# Sample 2
############
mydata4_2 <- subset(data3, year == 2005 | year == 2020)
region_data <- data2[, 1:2]
mydata4_2 <- merge(mydata4_2, region_data, by="Country")
mydata4_2 <- rename(mydata4_2, Region = Regional.indicator)
mydata4_2 <- aggregate(x = mydata4_2$Social.support,
by = list(mydata4_2$Region, mydata4_2$year),
FUN="mean", na.rm=TRUE, na.action=NULL)
mydata4_2 <- rename(mydata4_2, Region = Group.1, year = Group.2, Social_suppor_by_region_year = x)
ggplot(data = mydata4_2, aes(factor(year), y = Social_suppor_by_region_year, fill = Region)) +
geom_bar(position="dodge", stat="identity")+
labs(x = NULL, y = "Average Social Support Score by Region",
title = "Grouped Bar Chart for Social Support in 2005 & 2020") +
scale_fill_brewer(palette = "Set1") +
theme(legend.position="right")