dplyr_demo
Szupei Wang
2017/07/31
NBA = read.csv("C:/Users/szwang/Downloads/NBADATA.csv")
colnames(NBA)
[1] "X" "Players" "MP" "FG" "FGA" "FG_P"
[7] "X3P" "X3PA" "X3P_P" "FT" "FTA" "FT_P"
[13] "ORB" "DRB" "TRB" "AST" "STL" "BLK"
[19] "TOV" "PF" "PTS" "dates" "match" "team"
[25] "vs" "season" "position"
Filter rows
library(dplyr)
NBA_15_16 = NBA %>%
filter(season == '2015-2016')
NBA_14_16 = NBA %>%
filter(season == '2015-2016' | season == '2014-2015')
NBA_15_16_GW = NBA %>%
filter(season == '2015-2016' & team == 'Golden State Warriors')
Add new columns
NBA_throw = NBA %>%
mutate(throw = FGA + X3PA)
head(mutate(NBA,throw = FGA + X3PA),1)
X Players MP FG FGA FG_P X3P X3PA X3P_P FT FTA FT_P ORB
1 1 Michael Jordan 43:00:00 10 22 0.455 0 5 0 10 13 0.769 1
DRB TRB AST STL BLK TOV PF PTS dates match team
1 3 4 3 0 0 1 2 30 1996/11/1 home Chicago Bulls
vs season position throw
1 Boston Celtics 1996-1997 Starter 27
NBA_season = iris %>%
arrange(Sepal.Length)
head(arrange(iris,Sepal.Length))
Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1 4.3 3.0 1.1 0.1 setosa
2 4.4 2.9 1.4 0.2 setosa
3 4.4 3.0 1.3 0.2 setosa
4 4.4 3.2 1.3 0.2 setosa
5 4.5 2.3 1.3 0.3 setosa
6 4.6 3.1 1.5 0.2 setosa
Select columns
NBA_select = NBA %>%
select(season,team,PTS)
head(select(NBA,season,team,PTS))
season team PTS
1 1996-1997 Chicago Bulls 30
2 1996-1997 Chicago Bulls 18
3 1996-1997 Chicago Bulls 13
4 1996-1997 Chicago Bulls 7
5 1996-1997 Chicago Bulls 4
6 1996-1997 Chicago Bulls 20
Summarise values
NBA_summary = NBA %>%
summarise(mean(PTS),mean(TOV))
summarise(NBA,sum(PTS),mean(TOV))
sum(PTS) mean(TOV)
1 4606223 1.378494
NBA_16_GW = NBA %>%
filter(season == '2015-2016' & team == 'Golden State Warriors') %>%
mutate(throw = FGA + X3PA) %>%
select(Players,team,match,vs,PTS)
head(NBA_16_GW)
Players team match vs PTS
1 Stephen Curry Golden State Warriors away New Orleans Pelicans 40
2 Harrison Barnes Golden State Warriors away New Orleans Pelicans 8
3 Draymond Green Golden State Warriors away New Orleans Pelicans 10
4 Klay Thompson Golden State Warriors away New Orleans Pelicans 9
5 Andrew Bogut Golden State Warriors away New Orleans Pelicans 12
6 Andre Iguodala Golden State Warriors away New Orleans Pelicans 2
NBA %>%
filter(season == '2015-2016' & team == 'Golden State Warriors') %>%
mutate(throw = FGA + X3PA) %>%
select(Players,team,match,vs,PTS,throw) %>%
group_by(vs,match) %>%
summarise(value=sum(PTS))
# A tibble: 58 x 3
# Groups: vs [?]
vs match value
<fctr> <fctr> <int>
1 Atlanta Hawks away 109
2 Atlanta Hawks home 102
3 Boston Celtics away 106
4 Boston Celtics home 124
5 Brooklyn Nets away 107
6 Brooklyn Nets home 114
7 Charlotte Hornets away 111
8 Charlotte Hornets home 116
9 Chicago Bulls away 106
10 Chicago Bulls home 125
# ... with 48 more rows
NBA %>%
filter(season == '2015-2016') %>%
group_by(team,vs,dates,match) %>%
select(team,vs,match,PTS) %>%
summarise(sum(PTS)) %>%
distinct()
# A tibble: 2,460 x 5
# Groups: team, vs, dates [2,460]
team vs dates match `sum(PTS)`
<fctr> <fctr> <fctr> <fctr> <int>
1 Atlanta Hawks Boston Celtics 2015/11/13 home 93
2 Atlanta Hawks Boston Celtics 2015/11/24 away 121
3 Atlanta Hawks Boston Celtics 2015/12/18 home 109
4 Atlanta Hawks Boston Celtics 2016/4/9 away 118
5 Atlanta Hawks Brooklyn Nets 2015/11/17 home 88
6 Atlanta Hawks Brooklyn Nets 2015/11/4 away 101
7 Atlanta Hawks Brooklyn Nets 2016/1/16 away 114
8 Atlanta Hawks Charlotte Hornets 2015/10/30 away 97
9 Atlanta Hawks Charlotte Hornets 2015/11/1 home 94
10 Atlanta Hawks Charlotte Hornets 2016/1/13 home 84
# ... with 2,450 more rows
library(ggplot2)
NBA %>%
filter(season == '2015-2016') %>%
group_by(team) %>%
summarise(value=sum(PTS)) %>%
ggplot(aes(x = team ,y = value)) +
geom_bar(stat = "identity")
inner_join
A = data.frame(name=c('A','B','C'),value1=c(1,2,3))
B = data.frame(name=c('A','D','B'),value2=c(9,8,7))
A %>% inner_join(B,by='name')
name value1 value2
1 A 1 9
2 B 2 7
left_join
A = data.frame(name=c('A','B','C'),value1=c(1,2,3))
B = data.frame(name=c('A','D','B'),value2=c(9,8,7))
A %>% left_join(B,by='name')
name value1 value2
1 A 1 9
2 B 2 7
3 C 3 NA
right_join
A = data.frame(name=c('A','B','C'),value1=c(1,2,3))
B = data.frame(name=c('A','D','B'),value2=c(9,8,7))
A %>% right_join(B,by='name')
name value1 value2
1 A 1 9
2 D NA 8
3 B 2 7
full_join
A = data.frame(name=c('A','B','C'),value1=c(1,2,3))
B = data.frame(name=c('A','D','B'),value2=c(9,8,7))
A %>% full_join(B,by='name')
name value1 value2
1 A 1 9
2 B 2 7
3 C 3 NA
4 D NA 8