ggplot繪圖系統精華帖~

2021-02-21 數據小魔方

本文轉載自公眾號：每天都有進步

ggplot2 Pk 基本繪圖

基本圖形

# 房價數據

housing <- read.csv("dataSets/landdata-states.csv")

head(housing[1:5])

#日期數據轉換成數值數據

housing <- read.csv("dataSets/landdata-states.csv")

housing$Year <- as.numeric(substr(housing$Date, 1, 4))

housing$Qrtr <- as.numeric(substr(housing$Date, 5, 5))

housing$Date <- housing$Year + housing$Qrtr/4

#基本繪圖

hist(housing$Home.Value)

#ggplot2 繪圖

library(ggplot2)

ggplot(housing, aes(x = Home.Value)) +

geom_histogram()

複雜繪圖

#基本繪圖

plot(Home.Value ~ Date,

data=subset(housing, State == "MA"))

points(Home.Value ~ Date, col="red",

data=subset(housing, State == "TX"))

legend(1975, 3e5,

c("MA", "TX"), title="State",

col=c("black", "red"),

pch=c(1, 1))

#ggplot2 繪圖

ggplot(subset(housing, State %in% c("MA", "TX")),

aes(x=Date,

y=Home.Value,

color=State))+

geom_point()

ggplot2 之美#散點圖

hp2001Q1 <- subset(housing, Date == 2001.25)

ggplot(hp2001Q1,

aes(y = Structure.Cost, x = Land.Value)) +

geom_point()

#坐標取對數

ggplot(hp2001Q1,

aes(y = Structure.Cost, x = log(Land.Value))) +

geom_point()

#線性擬合

hp2001Q1$pred.SC <- predict(lm(Structure.Cost ~ log(Land.Value), data = hp2001Q1))

p1 <- ggplot(hp2001Q1, aes(x = log(Land.Value), y = Structure.Cost))

p1 + geom_point(aes(color = Home.Value)) +

geom_line(aes(y = pred.SC))

#平滑預測

p1 +

geom_point(aes(color = Home.Value)) +

geom_smooth()

#為數據點添加標籤

p1 +

geom_point() +

geom_text(aes(label=State), size = 3)

#辨識標記數據點

library("ggrepel")

p1 +

geom_point() +

geom_text_repel(aes(label=State), size = 3)

#為數據點添加顏色

p1 +

geom_point(aes(size = 2),

color="red")

#顏色映射給房屋價格，形狀映射給地區

p1 +

geom_point(aes(color=Home.Value, shape = region))

#經濟數據

dat <- read.csv("dataSets/EconomistData.csv")

head(dat)

#繪製CPI和HPI（人類發展指數）散點圖

ggplot(dat, aes(x = CPI, y = HDI, size = HDI.Rank)) + geom_point()

#改變點顏色

ggplot(dat, aes(x = CPI, y = HDI)) +

geom_point(color = "blue")

#點顏色映射給地區

ggplot(dat, aes(x = CPI, y = HDI)) +

geom_point(aes(color = Region))

#改變x , y 軸及圖例名

ggplot(dat, aes(x = CPI, y = HDI, color = Region)) +

geom_point() +

scale_x_continuous(name = "Corruption Perception Index") +

scale_y_continuous(name = "Human Development Index") +

scale_color_discrete(name = "Region of the world")

#自定義顏色為各地區

ggplot(dat, aes(x = CPI, y = HDI, color = Region)) +

geom_point() +

scale_x_continuous(name = "Corruption Perception Index") +

scale_y_continuous(name = "Human Development Index") +

scale_color_manual(name = "Region of the world",

values = c("#24576D",

"#099DD7",

"#28AADC",

"#248E84",

"#F2583F",

"#96503F"))

#改變點的大小

ggplot(dat, aes(x = CPI, y = HDI)) +

geom_point(aes(color = Region), size = 2)

#點的大小映射給人類發展指數排名

ggplot(dat, aes(x = CPI, y = HDI)) +

geom_point(aes(color = Region, size = HDI.Rank))

#擬合線的美化

ggplot(dat, aes(x = CPI, y = HDI)) +

geom_point() +

geom_smooth()

#線性擬合

ggplot(dat, aes(x = CPI, y = HDI)) +

geom_point() +

geom_smooth(method = "lm")

#不繪製擬合區間

ggplot(dat, aes(x = CPI, y = HDI)) +

geom_point() +

geom_line(stat = "smooth", method = "loess")

#增加擬合的平滑性

ggplot(dat, aes(x = CPI, y = HDI)) +

geom_point() +

geom_smooth(span = .4)

ggplot2 之統計轉化

p2 <- ggplot(housing, aes(x = Home.Value))

p2 + geom_histogram()

#頻度陰影圖

p2 + geom_histogram(stat = "bin", binwidth=4000)

#用aggregate 對數據重組

housing.sum <- aggregate(housing["Home.Value"], housing["State"], FUN=mean)

rbind(head(housing.sum), tail(housing.sum))

#繪製條形圖

ggplot(housing.sum, aes(x=State, y=Home.Value)) +

geom_bar(stat="identity")

ggplot2 之尺度轉化

#美國各州房價指數圖

p3 <- ggplot(housing,

aes(x = State,

y = Home.Price.Index)) +

theme(legend.position="top",

axis.text=element_text(size = 6))

(p4 <- p3 + geom_point(aes(color = Date),

alpha = 0.5,

size = 1.5,

position = position_jitter(width = 0.25, height = 0)))

#改變x軸標籤和圖例標籤

p4 + scale_x_discrete(name="State Abbreviation") +

scale_color_continuous(name="",

breaks = c(1976, 1994, 2013),

labels = c("'76", "'94", "'13"))

#改變點顏色用一種漸變到另一種顏色

p4 +

scale_x_discrete(name="State Abbreviation") +

scale_color_continuous(name="",

breaks = c(1976, 1994, 2013),

labels = c("'76", "'94", "'13"),

low = "blue", high = "red")

#使用muted柔和色

library("scales")

p4 +

scale_color_continuous(name="",

breaks = c(1976, 1994, 2013),

labels = c("'76", "'94", "'13"),

low = muted("blue"), high = muted("red"))

p4 +

scale_color_gradient2(name="",

breaks = c(1976, 1994, 2013),

labels = c("'76", "'94", "'13"),

low = muted("blue"),

high = muted("red"),

mid = "gray60",

midpoint = 1994)

ggplot 之分面

#用不同顏色表示每個州房價隨時間增長曲線

p5 <- ggplot(housing, aes(x = Date, y = Home.Value))

p5 + geom_line(aes(color = State))

#用分面的形式表示每個州房價隨時間增長曲線

(p5 <- p5 + geom_line() +

facet_wrap(~State, ncol = 10))

#修改分面主題

#黑線

p5 + theme_linedraw()

#亮線

p5 + theme_light()

# 更炫主題一

p5 + theme_minimal() +

theme(text = element_text(color = "turquoise"))

#更炫主題二

library(scales)

theme_new <- theme_bw() +

theme(plot.background = element_rect(size = 1, color = "blue", fill = "black"),

text=element_text(size = 12, color = "ivory"),

axis.text.y = element_text(colour = "purple"),

axis.text.x = element_text(colour = "red"),

panel.background = element_rect(fill = "pink"),

strip.background = element_rect(fill = muted("orange")))

p5 + theme_new

分別繪製曲線

#方法一

housing.byyear <- aggregate(cbind(Home.Value, Land.Value) ~ Date, data = housing, mean)

ggplot(housing.byyear,

aes(x=Date)) +

geom_line(aes(y=Home.Value), color="red") +

geom_line(aes(y=Land.Value), color="blue")

#方法二

library(tidyr)

home.land.byyear <- gather(housing.byyear,

value = "value",

key = "type",

Home.Value, Land.Value)

ggplot(home.land.byyear,

aes(x=Date,

y=value,

color=type)) +

geom_line()

有趣的點符

#25個點符

df2 <- data.frame(x = 1:5 , y = 1:25, z = 1:25)

s <- ggplot(df2, aes(x = x, y = y))

s + geom_point(aes(shape = z), size = 4) + scale_shape_identity()

綜合案例分析之經濟學人圖片

library("ggrepel")

library(grid)

dat <- read.csv("dataSets/EconomistData.csv")

pointsToLabel <- c("Russia", "Venezuela", "Iraq", "Myanmar", "Sudan",

"Afghanistan", "Congo", "Greece","Argentina","Brazil",

"India", "Italy", "China", "South Africa", "Spane",

"Botswana", "Cape Verde", "Bhutan", "Rwanda", "France",

"United States", "Germany", "Britain", "Barbados",

"Norway", "Japan","New Zealand", "Singapore")

dat$Region <- factor(dat$Region,

levels = c("EU W. Europe",

"Americas",

"Asia Pacific",

"East EU Cemt Asia",

"MENA",

"SSA"),

labels = c("OECD",

"Americas",

"Asia &\nOceania",

"Central &\nEastern Europe",

"Middle East &\nnorth Africa",

"Sub-Saharan\nAfrica"))

mR2 <- summary(lm(HDI ~ log(CPI), data = dat))$r.squared

ggplot(dat, aes(x = CPI, y = HDI, color = Region))+

geom_smooth(aes(group = 1),

method = "lm",

formula = y ~ log(x),

se = FALSE,

color = "red") +

geom_point(shape = 1, size = 2.5, stroke = 1.25)+

geom_text_repel(aes(label = Country),

color = "gray20",

data = subset(dat, Country %in% pointsToLabel),

force = 10)+

scale_x_continuous(name = "Corruption Perceptions Index, 2011 (10=least corrupt)",

limits = c(.9, 10.5),

breaks = 1:10) +

scale_y_continuous(name = "Human Development Index, 2011 (1=Best)",

limits = c(0.2, 1.0),

breaks = seq(0.2, 1.0, by = 0.1)) +

scale_color_manual(name = "",

values = c("#24576D",

"#099DD7",

"#28AADC",

"#248E84",

"#F2583F",

"#96503F")) +

ggtitle("Corruption and Human development")+

theme_minimal() +

theme(text = element_text(color = "gray20"),

legend.position = c("top"),

legend.direction = "horizontal",

legend.justification = 0.1,

legend.text = element_text(size = 11, color = "gray10"),

axis.text = element_text(face = "italic"),

axis.title.x = element_text(vjust = -1),

axis.title.y = element_text(vjust = 2),

axis.ticks.y = element_blank(),

axis.line = element_line(color = "gray40", size = 0.5),

axis.line.y = element_blank(),

panel.grid.major = element_line(color = "gray50", size = 0.5),

panel.grid.major.x = element_blank()

)

grid.text("Sources: Transparency International; UN Human Development Report",

x = .02, y = .03,

just = "left",

draw = TRUE)

grid.segments(x0 = 0.81, x1 = 0.825,

y0 = 0.90, y1 = 0.90,

gp = gpar(col = "red"),

draw = TRUE)

grid.text(paste0("R² = ",

as.integer(mR2*100),

"%"),

x = 0.835, y = 0.90,

gp = gpar(col = "gray20"),

draw = TRUE,

just = "left")

歡迎大家關注原文作者微信公眾號：

ggplot繪圖系統精華帖~

相關焦點

ggplot2包的繪圖邏輯簡介

R語言繪圖之ggplot2

Python數據可視化--在Python中調用ggplot進行繪圖

R語言 ggplot2 繪圖入門,看完你就理解ggplot2的繪圖邏輯了

高階可視化繪圖系統:ggplot2入門

ggplot2|詳解八大基本繪圖要素

ggplot2|theme主題設置,詳解繪圖優化-「精雕細琢」

R繪圖之ggplot2—添加注釋

R語言高階可視化繪圖系統:ggplot2入門 | 第8講

乾貨 | 繪圖的基本元素(ggplot2實現)

R語言可視化——ggplot圖表中的線條

ggplot2|繪製GO富集柱形圖

微課|ggplot2: 折線圖

Python語言plotnine VS R語言ggplot2

R語言統計與繪圖:怎麼加載Windows系統字體到圖形上?

如何在Python裡用ggplot2繪圖

R語言——ggplot2的繪圖邏輯

「繪圖之王」爭霸賽——Excel才是繪圖王道

基於ggplot2包繪製SCI學術箱線圖的保姆級教程

超棒教程:如何用ggplot2繪製漂亮的統計圖形

ggplot繪圖系統精華帖~

相關焦點

ggplot2包的繪圖邏輯簡介

R語言繪圖之ggplot2

Python數據可視化--在Python中調用ggplot進行繪圖

R語言 ggplot2 繪圖入門,看完你就理解ggplot2的繪圖邏輯了

高階可視化繪圖系統:ggplot2入門

ggplot2|詳解八大基本繪圖要素

ggplot2|theme主題設置,詳解繪圖優化-「精雕細琢」

R繪圖之ggplot2—添加注釋

R語言 高階可視化繪圖系統:ggplot2入門 | 第8講

乾貨 | 繪圖的基本元素(ggplot2實現)

R語言可視化——ggplot圖表中的線條

ggplot2|繪製GO富集柱形圖

微課|ggplot2: 折線圖

Python語言plotnine VS R語言ggplot2

R語言統計與繪圖:怎麼加載Windows系統字體到圖形上?

如何在Python裡用ggplot2繪圖

R語言——ggplot2的繪圖邏輯

「繪圖之王」爭霸賽——Excel才是繪圖王道

基於ggplot2包繪製SCI學術箱線圖的保姆級教程

超棒教程:如何用ggplot2繪製漂亮的統計圖形

R語言高階可視化繪圖系統:ggplot2入門 | 第8講