本文轉載自公眾號:每天都有進步
ggplot2 Pk 基本繪圖
基本圖形
# 房價數據
housing <- read.csv("dataSets/landdata-states.csv")
head(housing[1:5])
#日期數據轉換成數值數據
housing <- read.csv("dataSets/landdata-states.csv")
housing$Year <- as.numeric(substr(housing$Date, 1, 4))
housing$Qrtr <- as.numeric(substr(housing$Date, 5, 5))
housing$Date <- housing$Year + housing$Qrtr/4
#基本繪圖
hist(housing$Home.Value)
#ggplot2 繪圖
library(ggplot2)
ggplot(housing, aes(x = Home.Value)) +
geom_histogram()
複雜繪圖
#基本繪圖
plot(Home.Value ~ Date,
data=subset(housing, State == "MA"))
points(Home.Value ~ Date, col="red",
data=subset(housing, State == "TX"))
legend(1975, 3e5,
c("MA", "TX"), title="State",
col=c("black", "red"),
pch=c(1, 1))
#ggplot2 繪圖
ggplot(subset(housing, State %in% c("MA", "TX")),
aes(x=Date,
y=Home.Value,
color=State))+
geom_point()
ggplot2 之美#散點圖hp2001Q1 <- subset(housing, Date == 2001.25)
ggplot(hp2001Q1,
aes(y = Structure.Cost, x = Land.Value)) +
geom_point()
#坐標取對數
ggplot(hp2001Q1,
aes(y = Structure.Cost, x = log(Land.Value))) +
geom_point()
#線性擬合
hp2001Q1$pred.SC <- predict(lm(Structure.Cost ~ log(Land.Value), data = hp2001Q1))
p1 <- ggplot(hp2001Q1, aes(x = log(Land.Value), y = Structure.Cost))
p1 + geom_point(aes(color = Home.Value)) +
geom_line(aes(y = pred.SC))
#平滑預測
p1 +
geom_point(aes(color = Home.Value)) +
geom_smooth()
#為數據點添加標籤
p1 +
geom_point() +
geom_text(aes(label=State), size = 3)
#辨識標記數據點
library("ggrepel")
p1 +
geom_point() +
geom_text_repel(aes(label=State), size = 3)
#為數據點添加顏色
p1 +
geom_point(aes(size = 2),
color="red")
#顏色映射給房屋價格,形狀映射給地區
p1 +
geom_point(aes(color=Home.Value, shape = region))
#經濟數據
dat <- read.csv("dataSets/EconomistData.csv")
head(dat)
#繪製CPI和HPI(人類發展指數)散點圖
ggplot(dat, aes(x = CPI, y = HDI, size = HDI.Rank)) + geom_point()
#改變點顏色
ggplot(dat, aes(x = CPI, y = HDI)) +
geom_point(color = "blue")
#點顏色映射給地區
ggplot(dat, aes(x = CPI, y = HDI)) +
geom_point(aes(color = Region))
#改變x , y 軸及圖例名
ggplot(dat, aes(x = CPI, y = HDI, color = Region)) +
geom_point() +
scale_x_continuous(name = "Corruption Perception Index") +
scale_y_continuous(name = "Human Development Index") +
scale_color_discrete(name = "Region of the world")
#自定義顏色為各地區
ggplot(dat, aes(x = CPI, y = HDI, color = Region)) +
geom_point() +
scale_x_continuous(name = "Corruption Perception Index") +
scale_y_continuous(name = "Human Development Index") +
scale_color_manual(name = "Region of the world",
values = c("#24576D",
"#099DD7",
"#28AADC",
"#248E84",
"#F2583F",
"#96503F"))
#改變點的大小
ggplot(dat, aes(x = CPI, y = HDI)) +
geom_point(aes(color = Region), size = 2)
#點的大小映射給人類發展指數排名
ggplot(dat, aes(x = CPI, y = HDI)) +
geom_point(aes(color = Region, size = HDI.Rank))
#擬合線的美化
ggplot(dat, aes(x = CPI, y = HDI)) +
geom_point() +
geom_smooth()
#線性擬合
ggplot(dat, aes(x = CPI, y = HDI)) +
geom_point() +
geom_smooth(method = "lm")
#不繪製擬合區間
ggplot(dat, aes(x = CPI, y = HDI)) +
geom_point() +
geom_line(stat = "smooth", method = "loess")
#增加擬合的平滑性
ggplot(dat, aes(x = CPI, y = HDI)) +
geom_point() +
geom_smooth(span = .4)
ggplot2 之 統計轉化
p2 <- ggplot(housing, aes(x = Home.Value))
p2 + geom_histogram()
#頻度陰影圖
p2 + geom_histogram(stat = "bin", binwidth=4000)
#用aggregate 對數據重組
housing.sum <- aggregate(housing["Home.Value"], housing["State"], FUN=mean)
rbind(head(housing.sum), tail(housing.sum))
#繪製條形圖
ggplot(housing.sum, aes(x=State, y=Home.Value)) +
geom_bar(stat="identity")
ggplot2 之 尺度轉化
#美國各州房價指數圖
p3 <- ggplot(housing,
aes(x = State,
y = Home.Price.Index)) +
theme(legend.position="top",
axis.text=element_text(size = 6))
(p4 <- p3 + geom_point(aes(color = Date),
alpha = 0.5,
size = 1.5,
position = position_jitter(width = 0.25, height = 0)))
#改變x軸標籤和圖例標籤
p4 + scale_x_discrete(name="State Abbreviation") +
scale_color_continuous(name="",
breaks = c(1976, 1994, 2013),
labels = c("'76", "'94", "'13"))
#改變點顏色用一種漸變到另一種顏色
p4 +
scale_x_discrete(name="State Abbreviation") +
scale_color_continuous(name="",
breaks = c(1976, 1994, 2013),
labels = c("'76", "'94", "'13"),
low = "blue", high = "red")
#使用muted柔和色
library("scales")
p4 +
scale_color_continuous(name="",
breaks = c(1976, 1994, 2013),
labels = c("'76", "'94", "'13"),
low = muted("blue"), high = muted("red"))
p4 +
scale_color_gradient2(name="",
breaks = c(1976, 1994, 2013),
labels = c("'76", "'94", "'13"),
low = muted("blue"),
high = muted("red"),
mid = "gray60",
midpoint = 1994)
ggplot 之 分面
#用不同顏色表示每個州房價隨時間增長曲線
p5 <- ggplot(housing, aes(x = Date, y = Home.Value))
p5 + geom_line(aes(color = State))
#用分面的形式表示每個州房價隨時間增長曲線
(p5 <- p5 + geom_line() +
facet_wrap(~State, ncol = 10))
#修改分面主題
#黑線
p5 + theme_linedraw()
#亮線
p5 + theme_light()
# 更炫主題一
p5 + theme_minimal() +
theme(text = element_text(color = "turquoise"))
#更炫主題 二
library(scales)
theme_new <- theme_bw() +
theme(plot.background = element_rect(size = 1, color = "blue", fill = "black"),
text=element_text(size = 12, color = "ivory"),
axis.text.y = element_text(colour = "purple"),
axis.text.x = element_text(colour = "red"),
panel.background = element_rect(fill = "pink"),
strip.background = element_rect(fill = muted("orange")))
p5 + theme_new
分別繪製曲線
#方法一
housing.byyear <- aggregate(cbind(Home.Value, Land.Value) ~ Date, data = housing, mean)
ggplot(housing.byyear,
aes(x=Date)) +
geom_line(aes(y=Home.Value), color="red") +
geom_line(aes(y=Land.Value), color="blue")
#方法二
library(tidyr)
home.land.byyear <- gather(housing.byyear,
value = "value",
key = "type",
Home.Value, Land.Value)
ggplot(home.land.byyear,
aes(x=Date,
y=value,
color=type)) +
geom_line()
有趣的點符
#25個點符
df2 <- data.frame(x = 1:5 , y = 1:25, z = 1:25)
s <- ggplot(df2, aes(x = x, y = y))
s + geom_point(aes(shape = z), size = 4) + scale_shape_identity()
綜合案例分析之經濟學人圖片
library("ggrepel")
library(grid)
dat <- read.csv("dataSets/EconomistData.csv")
pointsToLabel <- c("Russia", "Venezuela", "Iraq", "Myanmar", "Sudan",
"Afghanistan", "Congo", "Greece","Argentina","Brazil",
"India", "Italy", "China", "South Africa", "Spane",
"Botswana", "Cape Verde", "Bhutan", "Rwanda", "France",
"United States", "Germany", "Britain", "Barbados",
"Norway", "Japan","New Zealand", "Singapore")
dat$Region <- factor(dat$Region,
levels = c("EU W. Europe",
"Americas",
"Asia Pacific",
"East EU Cemt Asia",
"MENA",
"SSA"),
labels = c("OECD",
"Americas",
"Asia &\nOceania",
"Central &\nEastern Europe",
"Middle East &\nnorth Africa",
"Sub-Saharan\nAfrica"))
mR2 <- summary(lm(HDI ~ log(CPI), data = dat))$r.squared
ggplot(dat, aes(x = CPI, y = HDI, color = Region))+
geom_smooth(aes(group = 1),
method = "lm",
formula = y ~ log(x),
se = FALSE,
color = "red") +
geom_point(shape = 1, size = 2.5, stroke = 1.25)+
geom_text_repel(aes(label = Country),
color = "gray20",
data = subset(dat, Country %in% pointsToLabel),
force = 10)+
scale_x_continuous(name = "Corruption Perceptions Index, 2011 (10=least corrupt)",
limits = c(.9, 10.5),
breaks = 1:10) +
scale_y_continuous(name = "Human Development Index, 2011 (1=Best)",
limits = c(0.2, 1.0),
breaks = seq(0.2, 1.0, by = 0.1)) +
scale_color_manual(name = "",
values = c("#24576D",
"#099DD7",
"#28AADC",
"#248E84",
"#F2583F",
"#96503F")) +
ggtitle("Corruption and Human development")+
theme_minimal() +
theme(text = element_text(color = "gray20"),
legend.position = c("top"),
legend.direction = "horizontal",
legend.justification = 0.1,
legend.text = element_text(size = 11, color = "gray10"),
axis.text = element_text(face = "italic"),
axis.title.x = element_text(vjust = -1),
axis.title.y = element_text(vjust = 2),
axis.ticks.y = element_blank(),
axis.line = element_line(color = "gray40", size = 0.5),
axis.line.y = element_blank(),
panel.grid.major = element_line(color = "gray50", size = 0.5),
panel.grid.major.x = element_blank()
)
grid.text("Sources: Transparency International; UN Human Development Report",
x = .02, y = .03,
just = "left",
draw = TRUE)
grid.segments(x0 = 0.81, x1 = 0.825,
y0 = 0.90, y1 = 0.90,
gp = gpar(col = "red"),
draw = TRUE)
grid.text(paste0("R² = ",
as.integer(mR2*100),
"%"),
x = 0.835, y = 0.90,
gp = gpar(col = "gray20"),
draw = TRUE,
just = "left")
歡迎大家關注原文作者微信公眾號: