Rのtidyrのpivot_longerとpivot_widerの使い方メモ


はじめに

Rのtidyrpivot_longerpivot_widerには、いろいろ引数が指定できます。Rのサンプルデータを用いて、pivot_longerpivot_widerの使い方を練習してみました。

目次

pivot_longer(), pivot_wider()

Rのtidyrpivot_longerpivot_widerの使い方をRのサンプルデータを用いて練習します。
tidyr
Pivoting
Pivot data from wide to long
Pivot data from long to wide

参考:
https://r4ds.had.co.nz/tidy-data.html#pivoting
https://heavywatal.github.io/rstats/tidyr.html

練習に用いるデータ

Rのサンプルデータirisanscombeを用います。
行番号を表示した方が分かりやすいため、tibble::rowid_to_column()で行番号を列名にしています。

R
library(dplyr)
library(tibble)
library(tidyr)

iris

# 行番号を列名に
iris_tbl <- iris %>% as_tibble() %>% rowid_to_column("id")
iris_tbl
# # A tibble: 150 x 6
#       id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#    <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
#  1     1          5.1         3.5          1.4         0.2 setosa 
#  2     2          4.9         3            1.4         0.2 setosa 
#  3     3          4.7         3.2          1.3         0.2 setosa 
#  4     4          4.6         3.1          1.5         0.2 setosa 
#  5     5          5           3.6          1.4         0.2 setosa 
#  6     6          5.4         3.9          1.7         0.4 setosa 
#  7     7          4.6         3.4          1.4         0.3 setosa 
#  8     8          5           3.4          1.5         0.2 setosa 
#  9     9          4.4         2.9          1.4         0.2 setosa 
# 10    10          4.9         3.1          1.5         0.1 setosa 
# # ... with 140 more rows

iris_tbl <- iris %>%
  as_tibble() %>% print() %>% # 途中経過をprint
  rowid_to_column("id") %>% print() # 最終結果をprint
# # A tibble: 150 x 5
#    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#           <dbl>       <dbl>        <dbl>       <dbl> <fct>  
#  1          5.1         3.5          1.4         0.2 setosa 
#  2          4.9         3            1.4         0.2 setosa 
#  3          4.7         3.2          1.3         0.2 setosa 
#  4          4.6         3.1          1.5         0.2 setosa 
#  5          5           3.6          1.4         0.2 setosa 
#  6          5.4         3.9          1.7         0.4 setosa 
#  7          4.6         3.4          1.4         0.3 setosa 
#  8          5           3.4          1.5         0.2 setosa 
#  9          4.4         2.9          1.4         0.2 setosa 
# 10          4.9         3.1          1.5         0.1 setosa 
# # ... with 140 more rows
# # A tibble: 150 x 6
#       id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#    <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>  
#  1     1          5.1         3.5          1.4         0.2 setosa 
#  2     2          4.9         3            1.4         0.2 setosa 
#  3     3          4.7         3.2          1.3         0.2 setosa 
#  4     4          4.6         3.1          1.5         0.2 setosa 
#  5     5          5           3.6          1.4         0.2 setosa 
#  6     6          5.4         3.9          1.7         0.4 setosa 
#  7     7          4.6         3.4          1.4         0.3 setosa 
#  8     8          5           3.4          1.5         0.2 setosa 
#  9     9          4.4         2.9          1.4         0.2 setosa 
# 10    10          4.9         3.1          1.5         0.1 setosa 
# # ... with 140 more rows

anscombe

anscombe_tbl <- anscombe %>% as_tibble() %>% rowid_to_column("id")
anscombe_tbl
# # A tibble: 11 x 9
#       id    x1    x2    x3    x4    y1    y2    y3    y4
#    <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#  1     1    10    10    10     8  8.04  9.14  7.46  6.58
#  2     2     8     8     8     8  6.95  8.14  6.77  5.76
#  3     3    13    13    13     8  7.58  8.74 12.7   7.71
#  4     4     9     9     9     8  8.81  8.77  7.11  8.84
#  5     5    11    11    11     8  8.33  9.26  7.81  8.47
#  6     6    14    14    14     8  9.96  8.1   8.84  7.04
#  7     7     6     6     6     8  7.24  6.13  6.08  5.25
#  8     8     4     4     4    19  4.26  3.1   5.39 12.5 
#  9     9    12    12    12     8 10.8   9.13  8.15  5.56
# 10    10     7     7     7     8  4.82  7.26  6.42  7.91
# 11    11     5     5     5     8  5.68  4.74  5.73  6.89

pivot_longer()

引数colsに指定した列を縦持ちデータに変換します。

R
# Sepal.Length~Petal.Width列を縦長に
iris_tbl %>%
  pivot_longer(cols = c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width"))
# # A tibble: 600 x 4
#       id Species name         value
#    <int> <fct>   <chr>        <dbl>
#  1     1 setosa  Sepal.Length   5.1
#  2     1 setosa  Sepal.Width    3.5
#  3     1 setosa  Petal.Length   1.4
#  4     1 setosa  Petal.Width    0.2
#  5     2 setosa  Sepal.Length   4.9
#  6     2 setosa  Sepal.Width    3  
#  7     2 setosa  Petal.Length   1.4
#  8     2 setosa  Petal.Width    0.2
#  9     3 setosa  Sepal.Length   4.7
# 10     3 setosa  Sepal.Width    3.2
# # ... with 590 more rows

# id,Species列以外の列(Sepal.Length~Petal.Width列)を縦長に
iris_tbl %>%
  pivot_longer(cols = -c(id, Species))
iris_tbl %>%
  pivot_longer(cols = c(-id, -Species))
iris_tbl %>%
  pivot_longer(cols = !c(id, Species))

# thで終わる列(Sepal.Length~Petal.Width列)を縦長に
iris_tbl %>%
  pivot_longer(cols = ends_with("th"))

anscombe_tbl %>%
  pivot_longer(cols = -id)
# # A tibble: 88 x 3
#       id name  value
#    <int> <chr> <dbl>
#  1     1 x1    10   
#  2     1 x2    10   
#  3     1 x3    10   
#  4     1 x4     8   
#  5     1 y1     8.04
#  6     1 y2     9.14
#  7     1 y3     7.46
#  8     1 y4     6.58
#  9     2 x1     8   
# 10     2 x2     8   
# # ... with 78 more rows

引数names_to, values_to

引数names_tovalues_toで、変換後の列名を指定できます。

R
iris_tbl %>%
  pivot_longer(cols = c(-id, -Species), names_to = "item", values_to = "val")
# # A tibble: 600 x 4
#       id Species item           val
#    <int> <fct>   <chr>        <dbl>
#  1     1 setosa  Sepal.Length   5.1
#  2     1 setosa  Sepal.Width    3.5
#  3     1 setosa  Petal.Length   1.4
#  4     1 setosa  Petal.Width    0.2
#  5     2 setosa  Sepal.Length   4.9
#  6     2 setosa  Sepal.Width    3  
#  7     2 setosa  Petal.Length   1.4
#  8     2 setosa  Petal.Width    0.2
#  9     3 setosa  Sepal.Length   4.7
# 10     3 setosa  Sepal.Width    3.2
# # ... with 590 more rows

anscombe_tbl %>%
  pivot_longer(cols = -id, names_to = "name", values_to = "val")
# # A tibble: 88 x 3
#       id name    val
#    <int> <chr> <dbl>
#  1     1 x1    10   
#  2     1 x2    10   
#  3     1 x3    10   
#  4     1 x4     8   
#  5     1 y1     8.04
#  6     1 y2     9.14
#  7     1 y3     7.46
#  8     1 y4     6.58
#  9     2 x1     8   
# 10     2 x2     8   
# # ... with 78 more rows

anscombe_tbl %>%
  select(id, starts_with("x")) %>% print() %>%
  pivot_longer(cols = -id, names_to = "xn", values_to = "val")
# # A tibble: 11 x 5
#       id    x1    x2    x3    x4
#    <int> <dbl> <dbl> <dbl> <dbl>
#  1     1    10    10    10     8
#  2     2     8     8     8     8
#  3     3    13    13    13     8
#  4     4     9     9     9     8
#  5     5    11    11    11     8
#  6     6    14    14    14     8
#  7     7     6     6     6     8
#  8     8     4     4     4    19
#  9     9    12    12    12     8
# 10    10     7     7     7     8
# 11    11     5     5     5     8
# # A tibble: 44 x 3
#       id xn      val
#    <int> <chr> <dbl>
#  1     1 x1       10
#  2     1 x2       10
#  3     1 x3       10
#  4     1 x4        8
#  5     2 x1        8
#  6     2 x2        8
#  7     2 x3        8
#  8     2 x4        8
#  9     3 x1       13
# 10     3 x2       13
# # ... with 34 more rows

引数names_prefix

縦持ちデータに変換する列名のうち、引数names_prefixに指定した文字列を接頭辞(prefix)として変換後のデータに入れないようにできます。

R
# 列x1~x4("xn")の"x"を接頭辞(prefix)として"n"部分のみ取り出す
anscombe_tbl %>%
  select(id, starts_with("x")) %>% print() %>%
  pivot_longer(cols = -id, names_to = "xn", names_prefix = "x", values_to = "val")
# # A tibble: 11 x 5
#       id    x1    x2    x3    x4
#    <int> <dbl> <dbl> <dbl> <dbl>
#  1     1    10    10    10     8
#  2     2     8     8     8     8
#  3     3    13    13    13     8
#  4     4     9     9     9     8
#  5     5    11    11    11     8
#  6     6    14    14    14     8
#  7     7     6     6     6     8
#  8     8     4     4     4    19
#  9     9    12    12    12     8
# 10    10     7     7     7     8
# 11    11     5     5     5     8
# # A tibble: 44 x 3
#       id xn      val
#    <int> <chr> <dbl>
#  1     1 1        10
#  2     1 2        10
#  3     1 3        10
#  4     1 4         8
#  5     2 1         8
#  6     2 2         8
#  7     2 3         8
#  8     2 4         8
#  9     3 1        13
# 10     3 2        13
# # ... with 34 more rows

# 接頭辞の文字列がない場合は無視
anscombe_tbl %>%
  pivot_longer(cols = -id, names_to = "xn", names_prefix = "x", values_to = "val")
# # A tibble: 88 x 3
#       id xn      val
#    <int> <chr> <dbl>
#  1     1 1     10   
#  2     1 2     10   
#  3     1 3     10   
#  4     1 4      8   
#  5     1 y1     8.04
#  6     1 y2     9.14
#  7     1 y3     7.46
#  8     1 y4     6.58
#  9     2 1      8   
# 10     2 2      8   
# # ... with 78 more rows

iris_tbl %>%
  select(id, starts_with("Sepal"), Species) %>% print() %>%
  pivot_longer(cols = -c(id, Species), names_to = "name", names_prefix = "Sepal.",
               values_to = "val")
# # A tibble: 150 x 4
#       id Sepal.Length Sepal.Width Species
#    <int>        <dbl>       <dbl> <fct>  
#  1     1          5.1         3.5 setosa 
#  2     2          4.9         3   setosa 
#  3     3          4.7         3.2 setosa 
#  4     4          4.6         3.1 setosa 
#  5     5          5           3.6 setosa 
#  6     6          5.4         3.9 setosa 
#  7     7          4.6         3.4 setosa 
#  8     8          5           3.4 setosa 
#  9     9          4.4         2.9 setosa 
# 10    10          4.9         3.1 setosa 
# # ... with 140 more rows
# # A tibble: 300 x 4
#       id Species name     val
#    <int> <fct>   <chr>  <dbl>
#  1     1 setosa  Length   5.1
#  2     1 setosa  Width    3.5
#  3     2 setosa  Length   4.9
#  4     2 setosa  Width    3  
#  5     3 setosa  Length   4.7
#  6     3 setosa  Width    3.2
#  7     4 setosa  Length   4.6
#  8     4 setosa  Width    3.1
#  9     5 setosa  Length   5  
# 10     5 setosa  Width    3.6
# # ... with 290 more rows

引数names_sep

引数names_sepを指定すると、pivot_longerしてseparateするのを一度にできます。

R
# "xn"を1桁目で分ける(1Lは整数1のこと)
anscombe_tbl %>%
  select(id, starts_with("x")) %>% print() %>%
  pivot_longer(cols = -id, names_to = c("x", "n"), names_sep = 1L, values_to = "val")
# # A tibble: 11 x 5
#       id    x1    x2    x3    x4
#    <int> <dbl> <dbl> <dbl> <dbl>
#  1     1    10    10    10     8
#  2     2     8     8     8     8
#  3     3    13    13    13     8
#  4     4     9     9     9     8
#  5     5    11    11    11     8
#  6     6    14    14    14     8
#  7     7     6     6     6     8
#  8     8     4     4     4    19
#  9     9    12    12    12     8
# 10    10     7     7     7     8
# 11    11     5     5     5     8
# # A tibble: 44 x 4
#       id x     n       val
#    <int> <chr> <chr> <dbl>
#  1     1 x     1        10
#  2     1 x     2        10
#  3     1 x     3        10
#  4     1 x     4         8
#  5     2 x     1         8
#  6     2 x     2         8
#  7     2 x     3         8
#  8     2 x     4         8
#  9     3 x     1        13
# 10     3 x     2        13
# # ... with 34 more rows
# こうするのと同じ
anscombe_tbl %>%
  select(id, starts_with("x")) %>% print() %>%
  pivot_longer(cols = -id, names_to = "xn", values_to = "val") %>% print() %>%
  separate(col = xn, into = c("x", "n"), sep = 1L)
# # A tibble: 11 x 5
#       id    x1    x2    x3    x4
#    <int> <dbl> <dbl> <dbl> <dbl>
#  1     1    10    10    10     8
#  2     2     8     8     8     8
#  3     3    13    13    13     8
#  4     4     9     9     9     8
#  5     5    11    11    11     8
#  6     6    14    14    14     8
#  7     7     6     6     6     8
#  8     8     4     4     4    19
#  9     9    12    12    12     8
# 10    10     7     7     7     8
# 11    11     5     5     5     8
# # A tibble: 44 x 3
#    id    xn      val
#       id xn      val
#    <int> <chr> <dbl>
#  1     1 x1       10
#  2     1 x2       10
#  3     1 x3       10
#  4     1 x4        8
#  5     2 x1        8
#  6     2 x2        8
#  7     2 x3        8
#  8     2 x4        8
#  9     3 x1       13
# 10     3 x2       13
# # ... with 34 more rows
# # A tibble: 44 x 4
#       id x     n       val
#    <int> <chr> <chr> <dbl>
#  1     1 x     1        10
#  2     1 x     2        10
#  3     1 x     3        10
#  4     1 x     4         8
#  5     2 x     1         8
#  6     2 x     2         8
#  7     2 x     3         8
#  8     2 x     4         8
#  9     3 x     1        13
# 10     3 x     2        13
# # ... with 34 more rows

# 軸(x,y)とセット番号(1,2,3,4)を分けてpivot
anscombe_tbl %>%
  pivot_longer(cols = -id, names_to = c("axis", "set"), names_sep = 1L,
               values_to = "val")
# # A tibble: 88 x 4
#       id axis  set     val
#    <int> <chr> <chr> <dbl>
#  1     1 x     1     10   
#  2     1 x     2     10   
#  3     1 x     3     10   
#  4     1 x     4      8   
#  5     1 y     1      8.04
#  6     1 y     2      9.14
#  7     1 y     3      7.46
#  8     1 y     4      6.58
#  9     2 x     1      8   
# 10     2 x     2      8   
# # ... with 78 more rows
# こうするのと同じ
anscombe_tbl %>%
  pivot_longer(cols = -id, names_to = "name", values_to = "val") %>%
  separate(col = name, into = c("axis", "set"), sep = 1L)

# "Sepal.Length"の"."で"Sepal"と"Length"に分ける。ただし、"."は"\\."で表す(正規表現)
iris_tbl %>%
  pivot_longer(cols = -c(id, Species), names_to = c("part", "measure"),
               names_sep = "\\.", values_to = "val")
# # A tibble: 600 x 5
#       id Species part  mesure   val
#    <int> <fct>   <chr> <chr>  <dbl>
#  1     1 setosa  Sepal Length   5.1
#  2     1 setosa  Sepal Width    3.5
#  3     1 setosa  Petal Length   1.4
#  4     1 setosa  Petal Width    0.2
#  5     2 setosa  Sepal Length   4.9
#  6     2 setosa  Sepal Width    3  
#  7     2 setosa  Petal Length   1.4
#  8     2 setosa  Petal Width    0.2
#  9     3 setosa  Sepal Length   4.7
# 10     3 setosa  Sepal Width    3.2
# # ... with 590 more rows
# こうするのと同じ
iris_tbl %>%
  pivot_longer(cols = -c(id, Species), names_to = "name",
               values_to = "val") %>% print() %>%
  separate(col = name, into = c("part", "mesure"), sep = "\\.")

# 6桁目で分ける
iris_tbl %>%
  pivot_longer(cols = -c(id, Species), names_to = c("part", "measure"),
               names_sep = 6L, values_to = "val")
# # A tibble: 600 x 5
#       id Species part   measure   val
#    <int> <fct>   <chr>  <chr>   <dbl>
#  1     1 setosa  Sepal. Length    5.1
#  2     1 setosa  Sepal. Width     3.5
#  3     1 setosa  Petal. Length    1.4
#  4     1 setosa  Petal. Width     0.2
#  5     2 setosa  Sepal. Length    4.9
#  6     2 setosa  Sepal. Width     3  
#  7     2 setosa  Petal. Length    1.4
#  8     2 setosa  Petal. Width     0.2
#  9     3 setosa  Sepal. Length    4.7
# 10     3 setosa  Sepal. Width     3.2
# # ... with 590 more rows

注意)正規表現(ICU正規表現)について:
- http://userguide.icu-project.org/strings/regexp
- https://heavywatal.github.io/rstats/stringr.html#正規表現

引数names_pattern

引数names_sepの代わりに、引数names_patternで分け方のパターンを指定できます。

R
# 正規表現"(.)(.)"に従って、1桁の何かと1桁の何かに分ける
anscombe_tbl %>%
  pivot_longer(cols = -id, names_to = c("axis", "set"),
               names_pattern = "(.)(.)", values_to = "val")
# # A tibble: 88 x 4
#       id axis  set     val
#    <int> <chr> <chr> <dbl>
#  1     1 x     1     10   
#  2     1 x     2     10   
#  3     1 x     3     10   
#  4     1 x     4      8   
#  5     1 y     1      8.04
#  6     1 y     2      9.14
#  7     1 y     3      7.46
#  8     1 y     4      6.58
#  9     2 x     1      8   
# 10     2 x     2      8   
# # ... with 78 more rows

# 正規表現"(.+)\\.(.+)"に従って、1桁以上の何かと"."をはさんで1桁以上の何かに分ける
iris_tbl %>%
  pivot_longer(cols = -c(id, Species), names_to = c("part", "measure"),
               names_pattern = "(.+)\\.(.+)", values_to = "val")
# # A tibble: 600 x 5
#       id Species part  measure   val
#    <int> <fct>   <chr> <chr>   <dbl>
#  1     1 setosa  Sepal Length    5.1
#  2     1 setosa  Sepal Width     3.5
#  3     1 setosa  Petal Length    1.4
#  4     1 setosa  Petal Width     0.2
#  5     2 setosa  Sepal Length    4.9
#  6     2 setosa  Sepal Width     3  
#  7     2 setosa  Petal Length    1.4
#  8     2 setosa  Petal Width     0.2
#  9     3 setosa  Sepal Length    4.7
# 10     3 setosa  Sepal Width     3.2
# # ... with 590 more rows

".value"

列名を、縦持ちに変換する部分と列名に残す部分とに分けて、変換できます。

R
# 軸とセット番号を分けてセット番号のみ縦持ちに(軸は列名に残す)
anscombe_tbl %>%
  pivot_longer(cols = -id, names_to = c(".value", "set"), names_sep = 1L) 
# # A tibble: 44 x 4
#       id set       x     y
#    <int> <chr> <dbl> <dbl>
#  1     1 1        10  8.04
#  2     1 2        10  9.14
#  3     1 3        10  7.46
#  4     1 4         8  6.58
#  5     2 1         8  6.95
#  6     2 2         8  8.14
#  7     2 3         8  6.77
#  8     2 4         8  5.76
#  9     3 1        13  7.58
# 10     3 2        13  8.74
# # ... with 34 more rows
# pivot_longerして、軸とセット番号を分けて、軸だけpivot_widerで列に戻したのと同じ
anscombe_tbl %>%
  pivot_longer(cols = -id, names_to = "name", values_to = "val") %>% print() %>%
  separate(col = name, into = c("axis", "set"), sep = 1L) %>% print() %>%
  pivot_wider(names_from = axis, values_from = val)
# # A tibble: 88 x 3
#       id name    val
#    <int> <chr> <dbl>
#  1     1 x1    10   
#  2     1 x2    10   
#  3     1 x3    10   
#  4     1 x4     8   
#  5     1 y1     8.04
#  6     1 y2     9.14
#  7     1 y3     7.46
#  8     1 y4     6.58
#  9     2 x1     8   
# 10     2 x2     8   
# # ... with 78 more rows
# # A tibble: 88 x 4
#       id axis  set     val
#    <int> <chr> <chr> <dbl>
#  1     1 x     1     10   
#  2     1 x     2     10   
#  3     1 x     3     10   
#  4     1 x     4      8   
#  5     1 y     1      8.04
#  6     1 y     2      9.14
#  7     1 y     3      7.46
#  8     1 y     4      6.58
#  9     2 x     1      8   
# 10     2 x     2      8   
# # ... with 78 more rows
# # A tibble: 44 x 4
#       id set       x     y
#    <int> <chr> <dbl> <dbl>
#  1     1 1        10  8.04
#  2     1 2        10  9.14
#  3     1 3        10  7.46
#  4     1 4         8  6.58
#  5     2 1         8  6.95
#  6     2 2         8  8.14
#  7     2 3         8  6.77
#  8     2 4         8  5.76
#  9     3 1        13  7.58
# 10     3 2        13  8.74
# # ... with 34 more rows

# names_sepの代わりにnames_patternで指定しても同じ
anscombe_tbl %>%
  pivot_longer(cols = -id, names_to = c(".value", "set"), names_pattern = "(.)(.)")

# 軸とセット番号を分けて軸のみ縦持ち(セット番号は列名に残す)
anscombe_tbl %>%
  pivot_longer(cols = -id, names_to = c("axis", ".value"), names_sep = 1L)
# # A tibble: 22 x 6
#       id axis    `1`   `2`   `3`   `4`
#    <int> <chr> <dbl> <dbl> <dbl> <dbl>
#  1     1 x     10    10    10     8   
#  2     1 y      8.04  9.14  7.46  6.58
#  3     2 x      8     8     8     8   
#  4     2 y      6.95  8.14  6.77  5.76
#  5     3 x     13    13    13     8   
#  6     3 y      7.58  8.74 12.7   7.71
#  7     4 x      9     9     9     8   
#  8     4 y      8.81  8.77  7.11  8.84
#  9     5 x     11    11    11     8   
# 10     5 y      8.33  9.26  7.81  8.47
# # ... with 12 more rows
# pivot_longerして、軸とセット番号を分けて、セット番号だけpivot_widerで列に戻したのと同じ
anscombe_tbl %>%
  pivot_longer(cols = -id, names_to = "name", values_to = "val") %>% print() %>%
  separate(col = name, into = c("axis", "set"), sep = 1L) %>% print() %>%
  pivot_wider(names_from = set, values_from = val)

# names_sepの代わりにnames_patternで指定しても同じ
anscombe_tbl %>%
  pivot_longer(cols = -id, names_to = c("axis", ".value"), names_pattern = "(.)(.)")

iris_tbl %>%
  pivot_longer(cols = -c(id, Species), names_to = c(".value", "measure"),
               names_sep = "\\.")
# # A tibble: 300 x 5
#       id Species measure Sepal Petal
#    <int> <fct>   <chr>   <dbl> <dbl>
#  1     1 setosa  Length    5.1   1.4
#  2     1 setosa  Width     3.5   0.2
#  3     2 setosa  Length    4.9   1.4
#  4     2 setosa  Width     3     0.2
#  5     3 setosa  Length    4.7   1.3
#  6     3 setosa  Width     3.2   0.2
#  7     4 setosa  Length    4.6   1.5
#  8     4 setosa  Width     3.1   0.2
#  9     5 setosa  Length    5     1.4
# 10     5 setosa  Width     3.6   0.2
# # ... with 290 more rows
# pivot_longerして、separateして、partだけpivot_widerで列に戻したのと同じ
iris_tbl %>%
  pivot_longer(cols = -c(id, Species), names_to = "name", values_to = "val") %>% print() %>%
  separate(col = name, into = c("part", "measure"), sep = "\\.") %>% print() %>%
  pivot_wider(names_from = part, values_from = val)

iris_tbl %>%
  pivot_longer(cols = -c(id, Species), names_to = c("part", ".value"),
               names_sep = "\\.")
# # A tibble: 300 x 5
#       id Species part  Length Width
#    <int> <fct>   <chr>  <dbl> <dbl>
#  1     1 setosa  Sepal    5.1   3.5
#  2     1 setosa  Petal    1.4   0.2
#  3     2 setosa  Sepal    4.9   3  
#  4     2 setosa  Petal    1.4   0.2
#  5     3 setosa  Sepal    4.7   3.2
#  6     3 setosa  Petal    1.3   0.2
#  7     4 setosa  Sepal    4.6   3.1
#  8     4 setosa  Petal    1.5   0.2
#  9     5 setosa  Sepal    5     3.6
# 10     5 setosa  Petal    1.4   0.2
# # ... with 290 more rows
# pivot_longerして、separateして、partだけpivot_widerで列に戻したのと同じ
iris_tbl %>%
  pivot_longer(cols = -c(id, Species), names_to = "name", values_to = "val") %>% print() %>%
  separate(col = name, into = c("part", "measure"), sep = "\\.") %>% print() %>%
  pivot_wider(names_from = measure, values_from = val)

separate()

列をある規則的に分けます。

R
# データの準備
iris_2 <- iris_tbl %>%
  pivot_longer(cols = -c(id, Species), names_to = "name", values_to = "value")
iris_2
# # A tibble: 600 x 4
#       id Species name         value
#    <int> <fct>   <chr>        <dbl>
#  1     1 setosa  Sepal.Length   5.1
#  2     1 setosa  Sepal.Width    3.5
#  3     1 setosa  Petal.Length   1.4
#  4     1 setosa  Petal.Width    0.2
#  5     2 setosa  Sepal.Length   4.9
#  6     2 setosa  Sepal.Width    3  
#  7     2 setosa  Petal.Length   1.4
#  8     2 setosa  Petal.Width    0.2
#  9     3 setosa  Sepal.Length   4.7
# 10     3 setosa  Sepal.Width    3.2
# # ... with 590 more rows

# name列を"."でpart列とmeasure列に分ける
iris_2 %>%
  separate(col = name, into = c("part", "measure"), sep = "\\.")
# # A tibble: 600 x 5
#       id Species part  measure value
#    <int> <fct>   <chr> <chr>   <dbl>
#  1     1 setosa  Sepal Length    5.1
#  2     1 setosa  Sepal Width     3.5
#  3     1 setosa  Petal Length    1.4
#  4     1 setosa  Petal Width     0.2
#  5     2 setosa  Sepal Length    4.9
#  6     2 setosa  Sepal Width     3  
#  7     2 setosa  Petal Length    1.4
#  8     2 setosa  Petal Width     0.2
#  9     3 setosa  Sepal Length    4.7
# 10     3 setosa  Sepal Width     3.2
# # ... with 590 more rows

# sepを指定しなくても明かなときは自動で判断
iris_2 %>%
  separate(col = name, into = c("part", "measure"))
# # A tibble: 600 x 5
#       id Species part  measure value
#    <int> <fct>   <chr> <chr>   <dbl>
#  1     1 setosa  Sepal Length    5.1
#  2     1 setosa  Sepal Width     3.5
#  3     1 setosa  Petal Length    1.4
#  4     1 setosa  Petal Width     0.2
#  5     2 setosa  Sepal Length    4.9
#  6     2 setosa  Sepal Width     3  
#  7     2 setosa  Petal Length    1.4
#  8     2 setosa  Petal Width     0.2
#  9     3 setosa  Sepal Length    4.7
# 10     3 setosa  Sepal Width     3.2
# # ... with 590 more rows

# データの準備
anscombe2 <- anscombe_tbl %>%
  pivot_longer(cols = -id)
anscombe2
# # A tibble: 88 x 3
#       id name  value
#    <int> <chr> <dbl>
#  1     1 x1    10   
#  2     1 x2    10   
#  3     1 x3    10   
#  4     1 x4     8   
#  5     1 y1     8.04
#  6     1 y2     9.14
#  7     1 y3     7.46
#  8     1 y4     6.58
#  9     2 x1     8   
# 10     2 x2     8   
# # ... with 78 more rows

# name列を1桁目でaxis列とset列に分ける
anscombe2 %>%
  separate(col = name, into = c("axis", "set"), sep = 1L)
# # A tibble: 88 x 4
#       id axis  set   value
#    <int> <chr> <chr> <dbl>
#  1     1 x     1     10   
#  2     1 x     2     10   
#  3     1 x     3     10   
#  4     1 x     4      8   
#  5     1 y     1      8.04
#  6     1 y     2      9.14
#  7     1 y     3      7.46
#  8     1 y     4      6.58
#  9     2 x     1      8   
# 10     2 x     2      8   
# # ... with 78 more rows

引数convert

引数convertで分けた列の型変換も同時にできます。

R
# 整数に変換も同時に行う
anscombe2 %>%
  separate(col = name, into = c("axis", "set"), sep = 1L, convert = TRUE)
# # A tibble: 88 x 4
#       id axis    set value
#    <int> <chr> <int> <dbl>
#  1     1 x         1 10   
#  2     1 x         2 10   
#  3     1 x         3 10   
#  4     1 x         4  8   
#  5     1 y         1  8.04
#  6     1 y         2  9.14
#  7     1 y         3  7.46
#  8     1 y         4  6.58
#  9     2 x         1  8   
# 10     2 x         2  8   
# # ... with 78 more rows
# separateしてsetを整数に変換するのと同じ
anscombe2 %>%
  separate(col = name, into = c("axis", "set"), sep = 1L) %>% print() %>%
  mutate(set = as.integer(set))
# # A tibble: 88 x 4
#  id axis  set   value
#    <int> <chr> <chr> <dbl>
#  1     1 x     1     10   
#  2     1 x     2     10   
#  3     1 x     3     10   
#  4     1 x     4      8   
#  5     1 y     1      8.04
#  6     1 y     2      9.14
#  7     1 y     3      7.46
#  8     1 y     4      6.58
#  9     2 x     1      8   
# 10     2 x     2      8   
# # ... with 78 more rows
# # A tibble: 88 x 4
#  id axis    set value
#    <int> <chr> <int> <dbl>
#  1     1 x         1 10   
#  2     1 x         2 10   
#  3     1 x         3 10   
#  4     1 x         4  8   
#  5     1 y         1  8.04
#  6     1 y         2  9.14
#  7     1 y         3  7.46
#  8     1 y         4  6.58
#  9     2 x         1  8   
# 10     2 x         2  8   
# # ... with 78 more rows

pivot_wider()

pivot_longerの逆の操作で、縦持ちデータを横持データに変換します。

R
# データの準備
iris_long <- iris_tbl %>%
  pivot_longer(cols = -c(id, Species), names_to = c("part", "measure"), names_sep = "\\.", values_to = "value")
iris_long
# # A tibble: 600 x 5
#       id Species part  measure value
#    <int> <fct>   <chr> <chr>   <dbl>
#  1     1 setosa  Sepal Length    5.1
#  2     1 setosa  Sepal Width     3.5
#  3     1 setosa  Petal Length    1.4
#  4     1 setosa  Petal Width     0.2
#  5     2 setosa  Sepal Length    4.9
#  6     2 setosa  Sepal Width     3  
#  7     2 setosa  Petal Length    1.4
#  8     2 setosa  Petal Width     0.2
#  9     3 setosa  Sepal Length    4.7
# 10     3 setosa  Sepal Width     3.2
# # ... with 590 more rows

iris_long %>%
  pivot_wider(names_from = part, values_from = value)
# # A tibble: 300 x 5
#       id Species measure Sepal Petal
#    <int> <fct>   <chr>   <dbl> <dbl>
#  1     1 setosa  Length    5.1   1.4
#  2     1 setosa  Width     3.5   0.2
#  3     2 setosa  Length    4.9   1.4
#  4     2 setosa  Width     3     0.2
#  5     3 setosa  Length    4.7   1.3
#  6     3 setosa  Width     3.2   0.2
#  7     4 setosa  Length    4.6   1.5
#  8     4 setosa  Width     3.1   0.2
#  9     5 setosa  Length    5     1.4
# 10     5 setosa  Width     3.6   0.2
# # ... with 290 more rows

iris_long %>%
  pivot_wider(names_from = measure, values_from = value)
# # A tibble: 300 x 5
#       id Species part  Length Width
#    <int> <fct>   <chr>  <dbl> <dbl>
#  1     1 setosa  Sepal    5.1   3.5
#  2     1 setosa  Petal    1.4   0.2
#  3     2 setosa  Sepal    4.9   3  
#  4     2 setosa  Petal    1.4   0.2
#  5     3 setosa  Sepal    4.7   3.2
#  6     3 setosa  Petal    1.3   0.2
#  7     4 setosa  Sepal    4.6   3.1
#  8     4 setosa  Petal    1.5   0.2
#  9     5 setosa  Sepal    5     3.6
# 10     5 setosa  Petal    1.4   0.2
# # ... with 290 more rows

# データの準備
anscombe_long <- anscombe_tbl %>%
  pivot_longer(cols = -id, names_to = c("axis", "set"), names_sep = 1L, values_to = "value")
anscombe_long
# # A tibble: 88 x 4
#       id axis  set   value
#    <int> <chr> <chr> <dbl>
#  1     1 x     1     10   
#  2     1 x     2     10   
#  3     1 x     3     10   
#  4     1 x     4      8   
#  5     1 y     1      8.04
#  6     1 y     2      9.14
#  7     1 y     3      7.46
#  8     1 y     4      6.58
#  9     2 x     1      8   
# 10     2 x     2      8   
# # ... with 78 more rows

anscombe_long %>%
  pivot_wider(names_from = axis, values_from = value)
# # A tibble: 44 x 4
#       id set       x     y
#    <int> <chr> <dbl> <dbl>
#  1     1 1        10  8.04
#  2     1 2        10  9.14
#  3     1 3        10  7.46
#  4     1 4         8  6.58
#  5     2 1         8  6.95
#  6     2 2         8  8.14
#  7     2 3         8  6.77
#  8     2 4         8  5.76
#  9     3 1        13  7.58
# 10     3 2        13  8.74
# # ... with 34 more rows

anscombe_long %>%
  pivot_wider(names_from = set, values_from = value)
# # A tibble: 22 x 6
#       id axis    `1`   `2`   `3`   `4`
#    <int> <chr> <dbl> <dbl> <dbl> <dbl>
#  1     1 x     10    10    10     8   
#  2     1 y      8.04  9.14  7.46  6.58
#  3     2 x      8     8     8     8   
#  4     2 y      6.95  8.14  6.77  5.76
#  5     3 x     13    13    13     8   
#  6     3 y      7.58  8.74 12.7   7.71
#  7     4 x      9     9     9     8   
#  8     4 y      8.81  8.77  7.11  8.84
#  9     5 x     11    11    11     8   
# 10     5 y      8.33  9.26  7.81  8.47
# # ... with 12 more rows

引数names_fromに複数列を指定できます。

R
iris_long %>%
  pivot_wider(names_from = c(part, measure), values_from = value)
# A tibble: 150 x 6
#       id Species Sepal_Length Sepal_Width Petal_Length Petal_Width
#    <int> <fct>          <dbl>       <dbl>        <dbl>       <dbl>
#  1     1 setosa           5.1         3.5          1.4         0.2
#  2     2 setosa           4.9         3            1.4         0.2
#  3     3 setosa           4.7         3.2          1.3         0.2
#  4     4 setosa           4.6         3.1          1.5         0.2
#  5     5 setosa           5           3.6          1.4         0.2
#  6     6 setosa           5.4         3.9          1.7         0.4
#  7     7 setosa           4.6         3.4          1.4         0.3
#  8     8 setosa           5           3.4          1.5         0.2
#  9     9 setosa           4.4         2.9          1.4         0.2
# 10    10 setosa           4.9         3.1          1.5         0.1
# # ... with 140 more rows

# 順にpivot_widerするのと同じ
iris_long %>% print() %>%
  pivot_wider(names_from = part, values_from = value) %>% print() %>%
  pivot_wider(names_from = measure, values_from = c(Sepal, Petal))
# # A tibble: 600 x 5
#       id Species part  measure value
#    <int> <fct>   <chr> <chr>   <dbl>
#  1     1 setosa  Sepal Length    5.1
#  2     1 setosa  Sepal Width     3.5
#  3     1 setosa  Petal Length    1.4
#  4     1 setosa  Petal Width     0.2
#  5     2 setosa  Sepal Length    4.9
#  6     2 setosa  Sepal Width     3  
#  7     2 setosa  Petal Length    1.4
#  8     2 setosa  Petal Width     0.2
#  9     3 setosa  Sepal Length    4.7
# 10     3 setosa  Sepal Width     3.2
# # ... with 590 more rows
# # A tibble: 300 x 5
#       id Species measure Sepal Petal
#    <int> <fct>   <chr>   <dbl> <dbl>
#  1     1 setosa  Length    5.1   1.4
#  2     1 setosa  Width     3.5   0.2
#  3     2 setosa  Length    4.9   1.4
#  4     2 setosa  Width     3     0.2
#  5     3 setosa  Length    4.7   1.3
#  6     3 setosa  Width     3.2   0.2
#  7     4 setosa  Length    4.6   1.5
#  8     4 setosa  Width     3.1   0.2
#  9     5 setosa  Length    5     1.4
# 10     5 setosa  Width     3.6   0.2
# # ... with 290 more rows
# # A tibble: 150 x 6
#       id Species Sepal_Length Sepal_Width Petal_Length Petal_Width
#    <int> <fct>          <dbl>       <dbl>        <dbl>       <dbl>
#  1     1 setosa           5.1         3.5          1.4         0.2
#  2     2 setosa           4.9         3            1.4         0.2
#  3     3 setosa           4.7         3.2          1.3         0.2
#  4     4 setosa           4.6         3.1          1.5         0.2
#  5     5 setosa           5           3.6          1.4         0.2
#  6     6 setosa           5.4         3.9          1.7         0.4
#  7     7 setosa           4.6         3.4          1.4         0.3
#  8     8 setosa           5           3.4          1.5         0.2
#  9     9 setosa           4.4         2.9          1.4         0.2
# 10    10 setosa           4.9         3.1          1.5         0.1
# # ... with 140 more rows

anscombe_long %>%
  pivot_wider(names_from = c(axis, set), values_from = value)
# # A tibble: 11 x 9
#       id   x_1   x_2   x_3   x_4   y_1   y_2   y_3   y_4
#    <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#  1     1    10    10    10     8  8.04  9.14  7.46  6.58
#  2     2     8     8     8     8  6.95  8.14  6.77  5.76
#  3     3    13    13    13     8  7.58  8.74 12.7   7.71
#  4     4     9     9     9     8  8.81  8.77  7.11  8.84
#  5     5    11    11    11     8  8.33  9.26  7.81  8.47
#  6     6    14    14    14     8  9.96  8.1   8.84  7.04
#  7     7     6     6     6     8  7.24  6.13  6.08  5.25
#  8     8     4     4     4    19  4.26  3.1   5.39 12.5 
#  9     9    12    12    12     8 10.8   9.13  8.15  5.56
# 10    10     7     7     7     8  4.82  7.26  6.42  7.91
# 11    11     5     5     5     8  5.68  4.74  5.73  6.89

# 順にpivot_widerするのと同じ
anscombe_long %>%
  pivot_wider(names_from = axis, values_from = value) %>% print() %>%
  pivot_wider(names_from = set, values_from = c(x, y))
# # A tibble: 44 x 4
#       id set       x     y
#    <int> <chr> <dbl> <dbl>
#  1     1 1        10  8.04
#  2     1 2        10  9.14
#  3     1 3        10  7.46
#  4     1 4         8  6.58
#  5     2 1         8  6.95
#  6     2 2         8  8.14
#  7     2 3         8  6.77
#  8     2 4         8  5.76
#  9     3 1        13  7.58
# 10     3 2        13  8.74
# # ... with 34 more rows
# # A tibble: 11 x 9
#       id   x_1   x_2   x_3   x_4   y_1   y_2   y_3   y_4
#    <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#  1     1    10    10    10     8  8.04  9.14  7.46  6.58
#  2     2     8     8     8     8  6.95  8.14  6.77  5.76
#  3     3    13    13    13     8  7.58  8.74 12.7   7.71
#  4     4     9     9     9     8  8.81  8.77  7.11  8.84
#  5     5    11    11    11     8  8.33  9.26  7.81  8.47
#  6     6    14    14    14     8  9.96  8.1   8.84  7.04
#  7     7     6     6     6     8  7.24  6.13  6.08  5.25
#  8     8     4     4     4    19  4.26  3.1   5.39 12.5 
#  9     9    12    12    12     8 10.8   9.13  8.15  5.56
# 10    10     7     7     7     8  4.82  7.26  6.42  7.91
# 11    11     5     5     5     8  5.68  4.74  5.73  6.89

anscombe_long %>%
  pivot_wider(names_from = c(set, axis), values_from = value)
# # A tibble: 11 x 9
#       id `1_x` `2_x` `3_x` `4_x` `1_y` `2_y` `3_y` `4_y`
#   <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#  1     1    10    10    10     8  8.04  9.14  7.46  6.58
#  2     2     8     8     8     8  6.95  8.14  6.77  5.76
#  3     3    13    13    13     8  7.58  8.74 12.7   7.71
#  4     4     9     9     9     8  8.81  8.77  7.11  8.84
#  5     5    11    11    11     8  8.33  9.26  7.81  8.47
#  6     6    14    14    14     8  9.96  8.1   8.84  7.04
#  7     7     6     6     6     8  7.24  6.13  6.08  5.25
#  8     8     4     4     4    19  4.26  3.1   5.39 12.5 
#  9     9    12    12    12     8 10.8   9.13  8.15  5.56
# 10    10     7     7     7     8  4.82  7.26  6.42  7.91
# 11    11     5     5     5     8  5.68  4.74  5.73  6.89

values_fromに複数列を指定できます。

R
# データの準備
anscombe_xy <- anscombe_long %>%
  pivot_wider(names_from = axis, values_from = value)
anscombe_xy
# # A tibble: 44 x 4
#       id set       x     y
#    <int> <chr> <dbl> <dbl>
#  1     1 1        10  8.04
#  2     1 2        10  9.14
#  3     1 3        10  7.46
#  4     1 4         8  6.58
#  5     2 1         8  6.95
#  6     2 2         8  8.14
#  7     2 3         8  6.77
#  8     2 4         8  5.76
#  9     3 1        13  7.58
# 10     3 2        13  8.74
# # ... with 34 more rows

anscombe_xy %>%
  pivot_wider(names_from = set, values_from = c(x, y))
# # A tibble: 11 x 9
#       id   x_1   x_2   x_3   x_4   y_1   y_2   y_3   y_4
#    <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#  1     1    10    10    10     8  8.04  9.14  7.46  6.58
#  2     2     8     8     8     8  6.95  8.14  6.77  5.76
#  3     3    13    13    13     8  7.58  8.74 12.7   7.71
#  4     4     9     9     9     8  8.81  8.77  7.11  8.84
#  5     5    11    11    11     8  8.33  9.26  7.81  8.47
#  6     6    14    14    14     8  9.96  8.1   8.84  7.04
#  7     7     6     6     6     8  7.24  6.13  6.08  5.25
#  8     8     4     4     4    19  4.26  3.1   5.39 12.5 
#  9     9    12    12    12     8 10.8   9.13  8.15  5.56
# 10    10     7     7     7     8  4.82  7.26  6.42  7.91
# 11    11     5     5     5     8  5.68  4.74  5.73  6.89

# データの準備
iris_part <- iris_long %>%
  pivot_wider(names_from = part, values_from = value)
iris_part
# # A tibble: 300 x 5
#       id Species measure Sepal Petal
#    <int> <fct>   <chr>   <dbl> <dbl>
#  1     1 setosa  Length    5.1   1.4
#  2     1 setosa  Width     3.5   0.2
#  3     2 setosa  Length    4.9   1.4
#  4     2 setosa  Width     3     0.2
#  5     3 setosa  Length    4.7   1.3
#  6     3 setosa  Width     3.2   0.2
#  7     4 setosa  Length    4.6   1.5
#  8     4 setosa  Width     3.1   0.2
#  9     5 setosa  Length    5     1.4
# 10     5 setosa  Width     3.6   0.2
# # ... with 290 more rows

iris_part %>%
  pivot_wider(names_from = measure, values_from = c(Sepal, Petal))
# # A tibble: 150 x 6
#       id Species Sepal_Length Sepal_Width Petal_Length Petal_Width
#    <int> <fct>          <dbl>       <dbl>        <dbl>       <dbl>
#  1     1 setosa           5.1         3.5          1.4         0.2
#  2     2 setosa           4.9         3            1.4         0.2
#  3     3 setosa           4.7         3.2          1.3         0.2
#  4     4 setosa           4.6         3.1          1.5         0.2
#  5     5 setosa           5           3.6          1.4         0.2
#  6     6 setosa           5.4         3.9          1.7         0.4
#  7     7 setosa           4.6         3.4          1.4         0.3
#  8     8 setosa           5           3.4          1.5         0.2
#  9     9 setosa           4.4         2.9          1.4         0.2
# 10    10 setosa           4.9         3.1          1.5         0.1
# # ... with 140 more rows

引数names_sep

引数names_fromに複数列を指定したとき、列名の間にデフォルトでは"_"が入りますが、引数names_sepで列名の区切りを指定できます。

R
iris_long %>%
  pivot_wider(names_from = c(part, measure), names_sep = ":", values_from = value)
# # A tibble: 150 x 6
#       id Species `Sepal:Length` `Sepal:Width` `Petal:Length` `Petal:Width`
#    <int> <fct>            <dbl>         <dbl>          <dbl>         <dbl>
#  1     1 setosa             5.1           3.5            1.4           0.2
#  2     2 setosa             4.9           3              1.4           0.2
#  3     3 setosa             4.7           3.2            1.3           0.2
#  4     4 setosa             4.6           3.1            1.5           0.2
#  5     5 setosa             5             3.6            1.4           0.2
#  6     6 setosa             5.4           3.9            1.7           0.4
#  7     7 setosa             4.6           3.4            1.4           0.3
#  8     8 setosa             5             3.4            1.5           0.2
#  9     9 setosa             4.4           2.9            1.4           0.2
# 10    10 setosa             4.9           3.1            1.5           0.1
# # ... with 140 more rows

iris_long %>%
  pivot_wider(names_from = c(part, measure), names_sep = "", values_from = value)
# # A tibble: 150 x 6
#       id Species SepalLength SepalWidth PetalLength PetalWidth
#    <int> <fct>         <dbl>      <dbl>       <dbl>      <dbl>
#  1     1 setosa          5.1        3.5         1.4        0.2
#  2     2 setosa          4.9        3           1.4        0.2
#  3     3 setosa          4.7        3.2         1.3        0.2
#  4     4 setosa          4.6        3.1         1.5        0.2
#  5     5 setosa          5          3.6         1.4        0.2
#  6     6 setosa          5.4        3.9         1.7        0.4
#  7     7 setosa          4.6        3.4         1.4        0.3
#  8     8 setosa          5          3.4         1.5        0.2
#  9     9 setosa          4.4        2.9         1.4        0.2
# 10    10 setosa          4.9        3.1         1.5        0.1
# # ... with 140 more rows

anscombe_long %>%
  pivot_wider(names_from = c(axis, set), names_sep = "", values_from = value)
# # A tibble: 11 x 9
#       id    x1    x2    x3    x4    y1    y2    y3    y4
#    <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#  1     1    10    10    10     8  8.04  9.14  7.46  6.58
#  2     2     8     8     8     8  6.95  8.14  6.77  5.76
#  3     3    13    13    13     8  7.58  8.74 12.7   7.71
#  4     4     9     9     9     8  8.81  8.77  7.11  8.84
#  5     5    11    11    11     8  8.33  9.26  7.81  8.47
#  6     6    14    14    14     8  9.96  8.1   8.84  7.04
#  7     7     6     6     6     8  7.24  6.13  6.08  5.25
#  8     8     4     4     4    19  4.26  3.1   5.39 12.5 
#  9     9    12    12    12     8 10.8   9.13  8.15  5.56
# 10    10     7     7     7     8  4.82  7.26  6.42  7.91
# 11    11     5     5     5     8  5.68  4.74  5.73  6.89

引数names_prefix

引数names_prefixで横持ちデータに変換後の列名に接頭辞(prefix)を付けられます。

R
iris_long %>%
  pivot_wider(names_from = part, names_prefix = "part_", values_from = value)
# # A tibble: 300 x 5
#       id Species measure part_Sepal part_Petal
#    <int> <fct>   <chr>        <dbl>      <dbl>
#  1     1 setosa  Length         5.1        1.4
#  2     1 setosa  Width          3.5        0.2
#  3     2 setosa  Length         4.9        1.4
#  4     2 setosa  Width          3          0.2
#  5     3 setosa  Length         4.7        1.3
#  6     3 setosa  Width          3.2        0.2
#  7     4 setosa  Length         4.6        1.5
#  8     4 setosa  Width          3.1        0.2
#  9     5 setosa  Length         5          1.4
# 10     5 setosa  Width          3.6        0.2
# # ... with 290 more rows

iris_long %>% 
  filter(part == "Sepal") %>% print() %>%
  pivot_wider(names_from = measure, names_prefix = "Sepal_", values_from = value)
# # A tibble: 300 x 5
#       id Species part  measure value
#    <int> <fct>   <chr> <chr>   <dbl>
#  1     1 setosa  Sepal Length    5.1
#  2     1 setosa  Sepal Width     3.5
#  3     2 setosa  Sepal Length    4.9
#  4     2 setosa  Sepal Width     3  
#  5     3 setosa  Sepal Length    4.7
#  6     3 setosa  Sepal Width     3.2
#  7     4 setosa  Sepal Length    4.6
#  8     4 setosa  Sepal Width     3.1
#  9     5 setosa  Sepal Length    5  
# 10     5 setosa  Sepal Width     3.6
# # ... with 290 more rows
# # A tibble: 150 x 5
#       id Species part  Sepal_Length Sepal_Width
#    <int> <fct>   <chr>        <dbl>       <dbl>
#  1     1 setosa  Sepal          5.1         3.5
#  2     2 setosa  Sepal          4.9         3  
#  3     3 setosa  Sepal          4.7         3.2
#  4     4 setosa  Sepal          4.6         3.1
#  5     5 setosa  Sepal          5           3.6
#  6     6 setosa  Sepal          5.4         3.9
#  7     7 setosa  Sepal          4.6         3.4
#  8     8 setosa  Sepal          5           3.4
#  9     9 setosa  Sepal          4.4         2.9
# 10    10 setosa  Sepal          4.9         3.1
# # ... with 140 more rows

iris_long %>% 
  filter(part == "Sepal") %>%
  pivot_wider(names_from = measure, names_prefix = "Sepal.", values_from = value)
# # A tibble: 150 x 5
#       id Species part  Sepal.Length Sepal.Width
#    <int> <fct>   <chr>        <dbl>       <dbl>
#  1     1 setosa  Sepal          5.1         3.5
#  2     2 setosa  Sepal          4.9         3  
#  3     3 setosa  Sepal          4.7         3.2
#  4     4 setosa  Sepal          4.6         3.1
#  5     5 setosa  Sepal          5           3.6
#  6     6 setosa  Sepal          5.4         3.9
#  7     7 setosa  Sepal          4.6         3.4
#  8     8 setosa  Sepal          5           3.4
#  9     9 setosa  Sepal          4.4         2.9
# 10    10 setosa  Sepal          4.9         3.1
# # ... with 140 more rows

anscombe_long %>%
  filter(axis == "x") %>% print() %>%
  pivot_wider(names_from = set, names_prefix = "x_", values_from = value)
# # A tibble: 44 x 4
#       id axis  set   value
#    <int> <chr> <chr> <dbl>
#  1     1 x     1        10
#  2     1 x     2        10
#  3     1 x     3        10
#  4     1 x     4         8
#  5     2 x     1         8
#  6     2 x     2         8
#  7     2 x     3         8
#  8     2 x     4         8
#  9     3 x     1        13
# 10     3 x     2        13
# # ... with 34 more rows
# # A tibble: 11 x 6
#       id axis    x_1   x_2   x_3   x_4
#    <int> <chr> <dbl> <dbl> <dbl> <dbl>
#  1     1 x        10    10    10     8
#  2     2 x         8     8     8     8
#  3     3 x        13    13    13     8
#  4     4 x         9     9     9     8
#  5     5 x        11    11    11     8
#  6     6 x        14    14    14     8
#  7     7 x         6     6     6     8
#  8     8 x         4     4     4    19
#  9     9 x        12    12    12     8
# 10    10 x         7     7     7     8
# 11    11 x         5     5     5     8

引数values_fill

縦持ちデータから横持ちデータに変換した後のデータにNAができるとき引数values_fillNAを変換できます。リストの形で指定します。

R
# NAを含むデータを準備
anscombe_long %>%
  filter(!(axis == "y" & set == "4")) %>% print() %>% # y4のみ除く
  pivot_wider(names_from = axis, values_from = value) # y4がないので、NAができる
# # A tibble: 77 x 4
#       id axis  set   value
#    <int> <chr> <chr> <dbl>
#  1     1 x     1     10   
#  2     1 x     2     10   
#  3     1 x     3     10   
#  4     1 x     4      8   
#  5     1 y     1      8.04
#  6     1 y     2      9.14
#  7     1 y     3      7.46
#  8     2 x     1      8   
#  9     2 x     2      8   
# 10     2 x     3      8   
# # ... with 67 more rows
# # A tibble: 44 x 4
#       id set       x     y
#    <int> <chr> <dbl> <dbl>
#  1     1 1        10  8.04
#  2     1 2        10  9.14
#  3     1 3        10  7.46
#  4     1 4         8 NA   
#  5     2 1         8  6.95
#  6     2 2         8  8.14
#  7     2 3         8  6.77
#  8     2 4         8 NA   
#  9     3 1        13  7.58
# 10     3 2        13  8.74
# # ... with 34 more rows
# このNAを0で埋める
anscombe_long %>%
  filter(!(axis == "y" & set == "4")) %>% print() %>% # y4のみ除く
  pivot_wider(names_from = axis, values_from = value, values_fill = list(value = 0)) # NAを0で埋める
# A tibble: 44 x 4
#       id set       x     y
#    <int> <chr> <dbl> <dbl>
#  1     1 1        10  8.04
#  2     1 2        10  9.14
#  3     1 3        10  7.46
#  4     1 4         8  0   
#  5     2 1         8  6.95
#  6     2 2         8  8.14
#  7     2 3         8  6.77
#  8     2 4         8  0   
#  9     3 1        13  7.58
# 10     3 2        13  8.74
# # ... with 34 more rows

# NAを含むデータを準備
anscombe_tbl %>%
  select(x1, y1) %>% print() %>%
  pivot_wider(names_from = y1, values_from = y1)
# # A tibble: 11 x 2
#       x1    y1
#    <dbl> <dbl>
#  1    10  8.04
#  2     8  6.95
#  3    13  7.58
#  4     9  8.81
#  5    11  8.33
#  6    14  9.96
#  7     6  7.24
#  8     4  4.26
#  9    12 10.8 
# 10     7  4.82
# 11     5  5.68
# # A tibble: 11 x 12
#       x1 `8.04` `6.95` `7.58` `8.81` `8.33` `9.96` `7.24` `4.26` `10.84` `4.82` `5.68`
#    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>   <dbl>  <dbl>  <dbl>
#  1    10   8.04  NA     NA     NA     NA     NA     NA     NA       NA    NA     NA   
#  2     8  NA      6.95  NA     NA     NA     NA     NA     NA       NA    NA     NA   
#  3    13  NA     NA      7.58  NA     NA     NA     NA     NA       NA    NA     NA   
#  4     9  NA     NA     NA      8.81  NA     NA     NA     NA       NA    NA     NA   
#  5    11  NA     NA     NA     NA      8.33  NA     NA     NA       NA    NA     NA   
#  6    14  NA     NA     NA     NA     NA      9.96  NA     NA       NA    NA     NA   
#  7     6  NA     NA     NA     NA     NA     NA      7.24  NA       NA    NA     NA   
#  8     4  NA     NA     NA     NA     NA     NA     NA      4.26    NA    NA     NA   
#  9    12  NA     NA     NA     NA     NA     NA     NA     NA       10.8  NA     NA   
# 10     7  NA     NA     NA     NA     NA     NA     NA     NA       NA     4.82  NA   
# 11     5  NA     NA     NA     NA     NA     NA     NA     NA       NA    NA      5.68
# このNAを0で埋める
anscombe_tbl %>%
  select(x1, y1) %>% print() %>%
  pivot_wider(names_from = y1, values_from = y1, values_fill = list(y1 = 0))
# # A tibble: 11 x 12
#       x1 `8.04` `6.95` `7.58` `8.81` `8.33` `9.96` `7.24` `4.26` `10.84` `4.82` `5.68`
#    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>   <dbl>  <dbl>  <dbl>
#  1    10   8.04   0      0      0      0      0      0      0        0     0      0   
#  2     8   0      6.95   0      0      0      0      0      0        0     0      0   
#  3    13   0      0      7.58   0      0      0      0      0        0     0      0   
#  4     9   0      0      0      8.81   0      0      0      0        0     0      0   
#  5    11   0      0      0      0      8.33   0      0      0        0     0      0   
#  6    14   0      0      0      0      0      9.96   0      0        0     0      0   
#  7     6   0      0      0      0      0      0      7.24   0        0     0      0   
#  8     4   0      0      0      0      0      0      0      4.26     0     0      0   
#  9    12   0      0      0      0      0      0      0      0       10.8   0      0   
# 10     7   0      0      0      0      0      0      0      0        0     4.82   0   
# 11     5   0      0      0      0      0      0      0      0        0     0      5.68

# NAを含むデータを準備
iris_tbl %>%
  mutate(Len = floor(Sepal.Length), Wid = floor(Sepal.Width)) %>% print() %>%
  count(Len, Wid, name = "count") %>% print() %>%
  pivot_wider(names_from = Wid, values_from = count) # LenとWidのクロス表(NAがある)
# # A tibble: 150 x 8
#      id Sepal.Length Sepal.Width Petal.Length Petal.Width Species   Len   Wid
#    <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>   <dbl> <dbl>
#  1     1          5.1         3.5          1.4         0.2 setosa      5     3
#  2     2          4.9         3            1.4         0.2 setosa      4     3
#  3     3          4.7         3.2          1.3         0.2 setosa      4     3
#  4     4          4.6         3.1          1.5         0.2 setosa      4     3
#  5     5          5           3.6          1.4         0.2 setosa      5     3
#  6     6          5.4         3.9          1.7         0.4 setosa      5     3
#  7     7          4.6         3.4          1.4         0.3 setosa      4     3
#  8     8          5           3.4          1.5         0.2 setosa      5     3
#  9     9          4.4         2.9          1.4         0.2 setosa      4     2
# 10    10          4.9         3.1          1.5         0.1 setosa      4     3
# # ... with 140 more rows
# # A tibble: 9 x 3
#     Len   Wid count
#   <dbl> <dbl> <int>
# 1     4     2     4
# 2     4     3    18
# 3     5     2    24
# 4     5     3    33
# 5     5     4     4
# 6     6     2    25
# 7     6     3    29
# 8     7     2     4
# 9     7     3     9
# # A tibble: 4 x 4
#     Len   `2`   `3`   `4`
#   <dbl> <int> <int> <int>
# 1     4     4    18    NA
# 2     5    24    33     4
# 3     6    25    29    NA
# 4     7     4     9    NA
# このNAを0で埋める
iris_tbl %>%
  mutate(Len = floor(Sepal.Length), Wid = floor(Sepal.Width)) %>% print() %>%
  select(Len, Wid) %>%
  count(Len, Wid, name = "count") %>%
  pivot_wider(names_from = Wid, values_from = count, values_fill = list(count = 0))
# # A tibble: 4 x 4
#     Len   `2`   `3`   `4`
#   <dbl> <int> <int> <int>
# 1     4     4    18     0
# 2     5    24    33     4
# 3     6    25    29     0
# 4     7     4     9     0

# NAを含むデータを準備
iris_tbl %>%
  group_by(Species, Sepal.Length) %>%
  count(name = "count") %>% print() %>%
  pivot_wider(names_from = Species, values_from = count) # NAになるところがある
# # A tibble: 57 x 3
# # Groups:   Species, Sepal.Length [57]
#    Species Sepal.Length count
#    <fct>          <dbl> <int>
#  1 setosa           4.3     1
#  2 setosa           4.4     3
#  3 setosa           4.5     1
#  4 setosa           4.6     4
#  5 setosa           4.7     2
#  6 setosa           4.8     5
#  7 setosa           4.9     4
#  8 setosa           5       8
#  9 setosa           5.1     8
# 10 setosa           5.2     3
# # ... with 47 more rows
# # A tibble: 35 x 4
# # Groups:   Sepal.Length [35]
#    Sepal.Length setosa versicolor virginica
#           <dbl>  <int>      <int>     <int>
#  1          4.3      1         NA        NA
#  2          4.4      3         NA        NA
#  3          4.5      1         NA        NA
#  4          4.6      4         NA        NA
#  5          4.7      2         NA        NA
#  6          4.8      5         NA        NA
#  7          4.9      4          1         1
#  8          5        8          2        NA
#  9          5.1      8          1        NA
# 10          5.2      3          1        NA
# # ... with 25 more rows
# このNAを0で埋める
iris_tbl %>%
  group_by(Species, Sepal.Length) %>%
  count(name = "count") %>% print() %>%
  pivot_wider(names_from = Species, values_from = count,
              values_fill = list(count = 0))
# # A tibble: 35 x 4
# # Groups:   Sepal.Length [35]
#    Sepal.Length setosa versicolor virginica
#           <dbl>  <int>      <int>     <int>
#  1          4.3      1          0         0
#  2          4.4      3          0         0
#  3          4.5      1          0         0
#  4          4.6      4          0         0
#  5          4.7      2          0         0
#  6          4.8      5          0         0
#  7          4.9      4          1         1
#  8          5        8          2         0
#  9          5.1      8          1         0
# 10          5.2      3          1         0
# # ... with 25 more rows

これを用いると、カテゴリー変数の列を0,1の2値変数に変換できます。

R
iris_tbl %>%
  mutate(value = 1) %>% print() %>%
  pivot_wider(names_from = Species, values_from = value)
# # A tibble: 150 x 7
#       id Sepal.Length Sepal.Width Petal.Length Petal.Width Species value
#    <int>        <dbl>       <dbl>        <dbl>       <dbl> <fct>   <dbl>
#  1     1          5.1         3.5          1.4         0.2 setosa      1
#  2     2          4.9         3            1.4         0.2 setosa      1
#  3     3          4.7         3.2          1.3         0.2 setosa      1
#  4     4          4.6         3.1          1.5         0.2 setosa      1
#  5     5          5           3.6          1.4         0.2 setosa      1
#  6     6          5.4         3.9          1.7         0.4 setosa      1
#  7     7          4.6         3.4          1.4         0.3 setosa      1
#  8     8          5           3.4          1.5         0.2 setosa      1
#  9     9          4.4         2.9          1.4         0.2 setosa      1
# 10    10          4.9         3.1          1.5         0.1 setosa      1
# # ... with 140 more rows
# # A tibble: 150 x 8
#       id Sepal.Length Sepal.Width Petal.Length Petal.Width setosa versicolor virginica
#    <int>        <dbl>       <dbl>        <dbl>       <dbl>  <dbl>      <dbl>     <dbl>
#  1     1          5.1         3.5          1.4         0.2      1         NA        NA
#  2     2          4.9         3            1.4         0.2      1         NA        NA
#  3     3          4.7         3.2          1.3         0.2      1         NA        NA
#  4     4          4.6         3.1          1.5         0.2      1         NA        NA
#  5     5          5           3.6          1.4         0.2      1         NA        NA
#  6     6          5.4         3.9          1.7         0.4      1         NA        NA
#  7     7          4.6         3.4          1.4         0.3      1         NA        NA
#  8     8          5           3.4          1.5         0.2      1         NA        NA
#  9     9          4.4         2.9          1.4         0.2      1         NA        NA
# 10    10          4.9         3.1          1.5         0.1      1         NA        NA
# # ... with 140 more rows
iris_tbl %>%
  mutate(value = 1) %>% print() %>%
  pivot_wider(names_from = Species, values_from = value,
              values_fill = list(value = 0))
# # A tibble: 150 x 8
#       id Sepal.Length Sepal.Width Petal.Length Petal.Width setosa versicolor virginica
#    <int>        <dbl>       <dbl>        <dbl>       <dbl>  <dbl>      <dbl>     <dbl>
#  1     1          5.1         3.5          1.4         0.2      1          0         0
#  2     2          4.9         3            1.4         0.2      1          0         0
#  3     3          4.7         3.2          1.3         0.2      1          0         0
#  4     4          4.6         3.1          1.5         0.2      1          0         0
#  5     5          5           3.6          1.4         0.2      1          0         0
#  6     6          5.4         3.9          1.7         0.4      1          0         0
#  7     7          4.6         3.4          1.4         0.3      1          0         0
#  8     8          5           3.4          1.5         0.2      1          0         0
#  9     9          4.4         2.9          1.4         0.2      1          0         0
# 10    10          4.9         3.1          1.5         0.1      1          0         0
# # ... with 140 more rows
# Species列を0,1の2値変数にできる

引数values_fn

引数values_fnを使うとエクセルのピボット集計のような集計も同時にできます。

R
# ピボット集計で合計
iris_long %>%
  pivot_wider(id_cols = c(part, measure),
              names_from = Species, values_from = value,
              values_fn = list(value = sum))
# # A tibble: 4 x 5
#   part  measure setosa versicolor virginica
#   <chr> <chr>    <dbl>      <dbl>     <dbl>
# 1 Sepal Length   250.       297.       329.
# 2 Sepal Width    171.       138.       149.
# 3 Petal Length    73.1      213        278.
# 4 Petal Width     12.3       66.3      101.
# グループ化して集計してpivot_widerするのと同じ(順序は異なるが)
iris_long %>%
  group_by(Species, part, measure) %>%
  summarise(Sum = sum(value)) %>% print() %>%
  pivot_wider(names_from = Species, values_from = Sum)
# # A tibble: 12 x 4
# # Groups:   Species, part [6]
#    Species    part  measure   Sum
#    <fct>      <chr> <chr>   <dbl>
#  1 setosa     Petal Length   73.1
#  2 setosa     Petal Width    12.3
#  3 setosa     Sepal Length  250. 
#  4 setosa     Sepal Width   171. 
#  5 versicolor Petal Length  213  
#  6 versicolor Petal Width    66.3
#  7 versicolor Sepal Length  297. 
#  8 versicolor Sepal Width   138. 
#  9 virginica  Petal Length  278. 
# 10 virginica  Petal Width   101. 
# 11 virginica  Sepal Length  329. 
# 12 virginica  Sepal Width   149. 
# # A tibble: 4 x 5
# # Groups:   part [2]
#   part  measure setosa versicolor virginica
#   <chr> <chr>    <dbl>      <dbl>     <dbl>
# 1 Petal Length    73.1      213        278.
# 2 Petal Width     12.3       66.3      101.
# 3 Sepal Length   250.       297.       329.
# 4 Sepal Width    171.       138.       149.

# ピボット集計で平均
iris_long %>%
  pivot_wider(id_cols = c(part,measure),
              names_from = Species, values_from = value,
              values_fn = list(value = mean))
# # A tibble: 4 x 5
#   part  measure setosa versicolor virginica
#   <chr> <chr>    <dbl>      <dbl>     <dbl>
# 1 Sepal Length   5.01        5.94      6.59
# 2 Sepal Width    3.43        2.77      2.97
# 3 Petal Length   1.46        4.26      5.55
# 4 Petal Width    0.246       1.33      2.03
# グループ化して集計してpivot_widerするのと同じ(順序は異なるが)
iris_long %>%
  group_by(Species, part, measure) %>%
  summarise(Mean = mean(value)) %>% print() %>%
  pivot_wider(names_from = Species, values_from = Mean)
# # A tibble: 12 x 4
# # Groups:   Species, part [6]
#    Species    part  measure  Mean
#    <fct>      <chr> <chr>   <dbl>
#  1 setosa     Petal Length  1.46 
#  2 setosa     Petal Width   0.246
#  3 setosa     Sepal Length  5.01 
#  4 setosa     Sepal Width   3.43 
#  5 versicolor Petal Length  4.26 
#  6 versicolor Petal Width   1.33 
#  7 versicolor Sepal Length  5.94 
#  8 versicolor Sepal Width   2.77 
#  9 virginica  Petal Length  5.55 
# 10 virginica  Petal Width   2.03 
# 11 virginica  Sepal Length  6.59 
# 12 virginica  Sepal Width   2.97 
# # A tibble: 4 x 5
# # Groups:   part [2]
#   part  measure setosa versicolor virginica
#   <chr> <chr>    <dbl>      <dbl>     <dbl>
# 1 Petal Length   1.46        4.26      5.55
# 2 Petal Width    0.246       1.33      2.03
# 3 Sepal Length   5.01        5.94      6.59
# 4 Sepal Width    3.43        2.77      2.97

参考