作者:李譽輝
四川大學在讀研究生
簡介
對於多個變量之間的相關關係,常常使用相關關係圖來可視化,R自帶有pairs()函數,
可以畫相關關係圖,但是比較複雜,我們先介紹基於ggplot2的GGally包。
等介紹完,再介紹pairs()函數。
ggmatrix()可以將多個ggplot2繪圖對象,按照矩陣進行排列。
1library(ggplot2)
2data(tips, package = "reshape")
3
4head(tips)
5
6g1 <- ggplot(tips, aes(x = total_bill, fill = sex)) +
7 geom_density(show.legend = FALSE)
8
9g2 <- ggplot(tips, aes(x = total_bill, fill = sex)) +
10 geom_histogram(position = position_stack(), show.legend = FALSE) +
11 facet_grid(rows = vars(time))# 以time變量行分面
12
13g3 <- ggplot(tips, aes(x = total_bill, y = tip, color = sex)) +
14 geom_point(show.legend = FALSE)
15
1library(ggplot2)
2
3g4 <- ggplot(tips, aes(x = time, y = total_bill, fill = sex)) +
4 geom_boxplot(show.legend = FALSE)
5
6g5 <- ggplot(tips, aes(x = time, fill = sex)) +
7 geom_bar(position = position_stack(), show.legend = FALSE)
8
9g6 <- ggplot(tips, aes(x = tip, fill = sex)) +
10 geom_histogram(position = position_stack(), show.legend = FALSE) +
11 coord_flip() +
12 facet_grid(cols = vars(time))
13
1library(ggplot2)
2library(dplyr)
3library(tibble)
4
5# 第一個圖
6text_1 <- round(cor(tips$total_bill, tips$tip), 3)
7tips_female <- as.tibble(tips) %>% filter(sex == "Female") %>% as.data.frame()
8tips_male <- as.tibble(tips) %>% filter(sex == "Male") %>% as.data.frame()
9text_2 <- round(cor(tips_female$total_bill, tips_female$tip), 3)
10text_3 <- round(cor(tips_male$total_bill, tips_male$tip), 3)
11mytext <- c(text_1, text_2, text_3)
12mytext <- paste0(c("Cor", "Female", "Male"), ":", mytext)
13mytext <- data.frame(text = mytext,
14 x = 5,
15 y = c(6, 4, 2),
16 stringsAsFactors = FALSE)
17
18g7 <- ggplot(data = mytext[-1, ], aes(x = x, y = y, label = text, color = text)) +
19 geom_text(show.legend = F) +
20 geom_text(data = mytext[1,], aes(x = x, y = y, label = text),
21 color = "black")
22
23rm(text_1, tips_female, tips_male, text_2, text_3, mytext)
24
25# 第2個圖
26g8 <- ggplot(tips, aes(x = time, y = tip, fill = sex)) +
27 geom_boxplot(show.legend = FALSE) +
28 coord_flip()
29
30# 第3個圖
31g9 <- ggplot(tips, aes(x = tip, fill = sex)) +
32 geom_density(show.legend = FALSE)
33
1library(customLayout)
2
3mylay <- lay_new(
4 mat = matrix(1:9, ncol = 3))
5
6plot_list <- list(g1, g2, g3, g4, g5, g6, g7, g8, g9)
7
8lay_grid(plot_list, mylay)
9
10rm(g1, g2, g3, g4, g5, g6, g7, g8, g9, mylay)
1library(GGally)
2
3gg_m <- ggmatrix(
4 plots = plot_list,
5 nrow = 3, ncol = 3,
6 xAxisLabels = c("Total Bill", "Time of Day", "Tip"),
7 yAxisLabels = c("Total Bill", "Time of Day", "Tip"),
8 byrow = FALSE,
9 title = "ggmatrix合併圖形"
10)
11
12
13gg_m + theme_bw()
14
15
16gg_m[1,2]
17
18rm(plot_list, gg_m)
GGally通過添加幾個函數來擴展ggplot2,以降低geom與轉換數據組合的複雜性。
其中一些功能包括配對圖矩陣,散點圖矩陣,平行坐標圖,生存圖,以及繪製網絡的幾個函數。
語法:
1ggpairs(data, mapping = NULL, columns = 1:ncol(data), title = NULL,
2 upper = list(continuous = "cor", combo = "box_no_facet", discrete =
3 "facetbar", na = "na"), lower = list(continuous = "points", combo =
4 "facethist", discrete = "facetbar", na = "na"), diag = list(continuous =
5 "densityDiag", discrete = "barDiag", na = "naDiag"), params = NULL, ...,
6 xlab = NULL, ylab = NULL, axisLabels = c("show", "internal", "none"),
7 columnLabels = colnames(data[columns]), labeller = "label_value",
8 switch = NULL, showStrips = NULL, legend = NULL,
9 cardinality_threshold = 15, progress = NULL,
10 legends = stop("deprecated"))
關鍵參數:
mapping, 表示要疊加到x,y上的aes()映射變量,這裡是全局映射。
column, 表示選擇要繪圖的列,可以用變量索引值指定,也可以用變量名指定。
columnLabels, 指定矩陣列名稱。
title, xlab, ylab, 表示指定標題和坐標軸名稱。
lower,upper,表示指定下三角和上三角的plot類型,列表傳參。
diag,表示指定對角線的plot類型,列表傳參。
axisLabels, 指定變量名稱的顯示位置,默認顯示在外側,
"internal"則顯示在內測,"none"則不顯示。
labeller, 表示指定分面標籤,
switch, 表示指定分面標籤位置,與ggplot2:facet_grid中一致,默認在頂部和右側,
若switch = "x",則顯示在底部和右側,若switch = "y"則顯示在頂部和左側,
若swith = "both"則顯示在底部和左側。
showStrips, 布爾運算決定是否顯示plots的條帶,默認NULL只顯示頂部和右側的條帶。
TRUE則顯示所有的條帶,FALSE則不顯示所有的條帶。
legend, 默認NULL不顯示,可以通過theme(legend.position = "bottom")調整圖例的位置。
有3種指定圖例類型的方式:
長度為2的數字向量,表示給矩陣所在的行和列增加圖例。如c(2,3)表示第2行第3列增加圖例。
長度為1的數字向量,表示根據矩陣的順序,給相應的panel添加圖例,
如legend=3表示給1行第3列增加圖例。
預先使用grab_legend()提取ggplot2對象的圖例,然後指定給legend。
cardinality_threshold, 表示允許因子變量的最大因子水平數量,默認最多15個因子水平。NULL則因子變量不會繪圖。
progress, 表示是否顯示進度條,默認NULL當超過15個plots時顯示進度條,
對繪圖結果沒有任何影響,不需要關注。
TRUE則顯示進度條,FALSE則不顯示進度條,
也可用ggmatrix_progress()生成進度條,然後指定。
plot類型:
通過5個參數控制plot類型:continuous,combo,discret, na, mapping
continuous, 表示如果變量x,y都是連續的,應該是什麼plot。
對於lower和upper參數:
可以是
"point", "smooth","smooth_loess", "density", "cor", "blank"。
對於diag參數: 可以是
"densityDiag", "barDiag", "blankDiag"
combo, 表示如果變量一個連續,一個離散,應該是什麼plot。
只能用於lower和upper不能用於diag。
離散變量只能計數,不能映射坐標,所以可能存在坐標翻轉。
可以是
"box", "box_no_facet", "dot",
"dot_no_facet",
"facethist", "facetdensity",
"denstrip", "blank"
discrete, 表示2個變量都是離散的,應該是什麼plot。
對於upper和lower參數:
可以是:
"facetbar", "ratio", "blank"。
對於diag參數: 可以是"barDiag", "blankDiag"。
na, 表示指定變量為na的情況,
mapping, 表示aes()映射。若指定mapping參數,則疊加到x,y上去。
默認
lower = list(continuous = "point", combo = "facetthist", discrete = "facetbar")
默認
upper = list(continuous = "cor", combo = "box_no_facet", discrete = "box")
默認
diag = list(continuous = "density", discrete = "barDiag")
1library(GGally)
2library(ggplot2)
3
4ggpairs(tips, mapping = aes(color = sex),
5 columns = c("total_bill", "time", "tip"),
6 columnLabels = c("Total_Bill(連續變量)", "Time(離散變量)", "Tip(連續變量)"),
7 title = "變量名指定column")
8
9ggpairs(tips, mapping = aes(color = sex),
10 columns = c(1, 6, 2),
11 columnLabels = c("Total_Bill(連續變量)", "Time(離散變量)", "Tip(連續變量)"),
12 title = "索引值指定column")
自定義lower
一個離散變量,lower的discrete參數無效。
1ggpairs(tips, mapping = aes(color = day),
2 columns = c("total_bill", "time", "tip"),
3 columnLabels = c("Total_Bill(連續變量)", "Time(離散變量)", "Tip(連續變量)"),
4 lower = list(
5 continuous = "cor",
6 combo = "dot_no_facet"
7 ),
8 upper = list(
9 continuous = "blank",
10 combo = "blank"
11 ),
12 diag = list(
13 continuous = "blankDiag",
14 discrete = "blankDiag"
15 ),
16 title = "自定義lower\n(lower$continuous = \"cor\", lower$combo = \"dot_no_facet\")"
17)
兩個離散變量,lower的continuous參數無效。
1ggpairs(tips, mapping = aes(color = day),
2 columns = c("total_bill", "time", "sex"),
3 columnLabels = c("Total_Bill(連續變量)", "Time(離散變量)", "Sex(離散變量)"),
4 lower = list(
5 combo = "dot_no_facet",
6 discrete = "blank"
7 ),
8 upper = list(
9 combo = "blank",
10 discrete = "blank"
11 ),
12 diag = list(
13 continuous = "blankDiag",
14 discrete = "blankDiag"
15 ),
16 title = "自定義lower\n(lower$combo = \"dot_no_facet\",lower$discrete = \"blank\" )"
17)
一個離散變量,upper的discrete參數無效。
1ggpairs(tips, mapping = aes(color = day),
2 columns = c("total_bill", "time", "tip"),
3 columnLabels = c("Total_Bill(連續變量)", "Time(離散變量)", "Tip(連續變量)"),
4 upper = list(
5 continuous = "density",
6 combo = "dot_no_facet"
7 ),
8 lower = list(
9 continuous = "blank",
10 combo = "blank"
11 ),
12 diag = list(
13 continuous = "blankDiag",
14 discrete = "blankDiag"
15 ),
16 title = "自定義upper\n(upper$continuous = \"density\", upper$combo = \"dot_no_facet\")"
17)
兩個離散變量,upper的continuous參數無效。
1ggpairs(tips, mapping = aes(color = day),
2 columns = c("total_bill", "time", "sex"),
3 columnLabels = c("Total_Bill(連續變量)", "Time(離散變量)", "Sex(離散變量)"),
4 upper = list(
5 combo = "dot_no_facet",
6 discrete = "ratio"
7 ),
8 lower = list(
9 combo = "blank",
10 discrete = "blank"
11 ),
12 diag = list(
13 continuous = "blankDiag",
14 discrete = "blankDiag"
15 ),
16 title = "自定義upper\n(lower$combo = \"dot_no_facet\",upper$discrete = \"ratio\" )"
17)
diag沒有combo參數。
1ggpairs(tips, mapping = aes(color = day),
2 columns = c("total_bill", "time", "tip"),
3 columnLabels = c("Total_Bill(連續變量)", "Time(離散變量)", "Tip(連續變量)"),
4 diag = list(
5 continuous = "barDiag",
6 discrete = "blankDiag"
7 ),
8 lower = list(
9 continuous = "blank",
10 combo = "blank"
11 ),
12 upper = list(
13 continuous = "blank",
14 combo = "blank"
15 ),
16 title = "自定義diag\n(diag$continuous = \"barDiag\", diag$discrete = \"blankDiag\")"
17)
1ggpairs(tips, mapping = aes(color = day),
2 columns = c("total_bill", "time", "sex"),
3 columnLabels = c("Total_Bill(連續變量)", "Time(離散變量)", "Sex(離散變量)"),
4 diag = list(
5 continuous = "barDiag",
6 discrete = "barDiag"
7 ),
8 lower = list(
9 discrete = "blank",
10 combo = "blank"
11 ),
12 upper = list(
13 discrete = "blank",
14 combo = "blank"
15 ),
16 title = "自定義diag\n(lower$continuous = \"barDiag\",diag$barDiag = \"barDiag\" )"
17)
1library(ggplot2)
2library(GGally)
3data(tips, package = "reshape")
4
5ggpairs(tips,
6 columns = c("total_bill", "time", "tip"),
7 columnLabels = c("Total_Bill(連續變量)", "Time(離散變量)", "Tip(連續變量)"),
8 title = "無mapping"
9)
1ggpairs(tips,
2 columns = c("total_bill", "time", "tip"),
3 columnLabels = c("Total_Bill(連續變量)", "Time(離散變量)", "Tip(連續變量)"),
4 lower = list(mapping = aes(color = time)),
5 title = "自定義lower(lower$mapping = \"time\")"
6)
1ggpairs(tips,
2 columns = c("total_bill", "tip", "size"),
3 columnLabels = c("Total_Bill(連續變量)", "Tip(連續變量)", "Size(連續變量)"),
4 lower = list(
5 continuous = "cor",
6 mapping = aes(color = sex)
7 ),
8 upper = list(
9 continuous = "cor",
10 mapping = aes(color = smoker)
11 ),
12 diag = list(
13 continuous = "barDiag",
14 mapping = aes(color = time)
15 ),
16 title = "自定義lower,upper,diag\n(下三角顏色為sex,上三角顏色為smoker,對角顏色為time)"
17)
2個連續變量,1個離散變量。
1ggpairs(tips, mapping = aes(color = day),
2 columns = c("total_bill", "tip", "time"),
3 columnLabels = c("Total_Bill(連續變量)", "Tip(連續變量)", "Time(離散變量)"),
4 lower = list(
5 continuous = "cor",
6 combo = "dot_no_facet"
7 ),
8 upper = list(
9 continuous = "density",
10 combo = "dot_no_facet"
11 ),
12 diag = list(
13 continuous = "barDiag",
14 discrete = "blankDiag"
15 ),
16 title = "自定義lower,upper,diag(兩個連續變量,一個離散變量)"
17)
1個連續變量,2個離散變量。
1ggpairs(tips, mapping = aes(color = day),
2 columns = c("total_bill", "time", "sex"),
3 columnLabels = c("Total_Bill(連續變量)", "Time(離散變量)", "Sex(離散變量)"),
4 lower = list(
5 combo = "dot_no_facet",
6 discrete = "blank"
7 ),
8 upper = list(
9 combo = "dot_no_facet",
10 discrete = "ratio"
11 ),
12 diag = list(
13 continuous = "barDiag",
14 discrete = "barDiag"
15 ),
16 title = "自定義lower,upper,diag(一個連續變量,兩個離散變量)"
17)
——————————————
往期精彩: