Ever read a post and went damn! I really wonder how that would work for AFL .
Well that was me a couple of weeks ago reading this fantastic post and as most people know a good post is a fantastic post when it is reproducible.
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1 v purrr 0.2.4
## v tibble 1.4.2 v dplyr 0.7.4
## v tidyr 0.8.0 v stringr 1.3.0
## v readr 1.1.1 v forcats 0.3.0
## -- Conflicts ------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(rjags)
## Loading required package: coda
## Linked to JAGS 4.3.0
## Loaded modules: basemod,bugs
library(gsheet)
library(stringr)
library(knitr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
logit <- function(p) {
out <- log(p/(1 - p))
return(out)
}
url<-"https://docs.google.com/spreadsheets/d/1U95IzGYJGOzQgLZVmUk6PPsGPLJoyJ2-Ry2dEY3sU3A/edit?usp=sharing"
afl_bookies<-read.csv(text=gsheet2text(url, format='csv'), stringsAsFactors=FALSE)
## No encoding supplied: defaulting to UTF-8.
names(afl_bookies)
## [1] "date" "Kick.Off..local."
## [3] "Home.Team" "Away.Team"
## [5] "Venue" "Home.Score"
## [7] "Away.Score" "Play.Off.Game."
## [9] "Home.Goals" "Home.Behinds"
## [11] "Away.Goals" "Away.Behinds"
## [13] "Home.Odds" "Away.Odds"
## [15] "Bookmakers.Surveyed" "Home.Odds.Open"
## [17] "Home.Odds.Min" "Home.Odds.Max"
## [19] "Home.Odds.Close" "Away.Odds.Open"
## [21] "Away.Odds.Min" "Away.Odds.Max"
## [23] "Away.Odds.Close" "Home.Line.Open"
## [25] "Home.Line.Min" "Home.Line.Max"
## [27] "Home.Line.Close" "Away.Line.Open"
## [29] "Away.Line.Min" "Away.Line.Max"
## [31] "Away.Line.Close" "Home.Line.Odds.Open"
## [33] "Home.Line.Odds.Min" "Home.Line.Odds.Max"
## [35] "Home.Line.Odds.Close" "Away.Line.Odds.Open"
## [37] "Away.Line.Odds.Min" "Away.Line.Odds.Max"
## [39] "Away.Line.Odds.Close" "Total.Score.Open"
## [41] "Total.Score.Min" "Total.Score.Max"
## [43] "Total.Score.Close" "Total.Score.Over.Open"
## [45] "Total.Score.Over.Min" "Total.Score.Over.Max"
## [47] "Total.Score.Over.Close" "Total.Score.Under.Open"
## [49] "Total.Score.Under.Min" "Total.Score.Under.Max"
## [51] "Total.Score.Under.Close"
colnames(afl_bookies)[1] <- "date"
afl_bookies$date<-dmy(afl_bookies$date)
afl_bookies$overround<-(1/afl_bookies$Home.Odds.Close) + (1/afl_bookies$Away.Odds.Close)
summary(afl_bookies$overround)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.012 1.024 1.026 1.027 1.028 1.063 681
plot(afl_bookies$overround)
plot(afl_bookies$overround-1)
qplot(afl_bookies$overround,geom="histogram", bins=100)
## Warning: Removed 681 rows containing non-finite values (stat_bin).
afl_bookies$overround<-afl_bookies$overround-1
afl_bookies$true.home.prob<-1/((afl_bookies$Home.Odds.Close*afl_bookies$overround)+afl_bookies$Home.Odds.Close)
afl_bookies$true.away.prob<-1/((afl_bookies$Away.Odds.Close*afl_bookies$overround)+afl_bookies$Away.Odds.Close)
qplot((afl_bookies$true.home.prob+afl_bookies$true.away.prob),geom="histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 681 rows containing non-finite values (stat_bin).
min.day <- min(afl_bookies$date)
afl_bookies <- afl_bookies %>%
mutate(day = date - min.day, week = as.numeric(floor(day/7) + 1))
tab.out <- head(afl_bookies, 4) %>% select(date, Home.Team, Away.Team, true.home.prob)
kable(tab.out)
date | Home.Team | Away.Team | true.home.prob |
---|---|---|---|
2018-04-22 | Brisbane | Gold Coast | 0.5454545 |
2018-04-22 | North Melbourne | Hawthorn | 0.3811881 |
2018-04-21 | Fremantle | Western Bulldogs | 0.6030151 |
2018-04-21 | Port Adelaide | Geelong | 0.6474820 |
y <- logit(afl_bookies$true.home.prob)
w <- afl_bookies$week
w
## [1] 462 462 462 462 462 462 462 461 461 461 461 461 461 461 461 461 460
## [18] 460 460 460 460 460 460 460 460 459 459 459 459 459 459 459 459 458
## [35] 458 458 458 458 458 458 458 458 457 433 432 432 431 431 430 430 430
## [52] 429 428 428 428 428 428 428 428 428 428 427 427 427 427 427 427 427
## [69] 427 427 426 426 426 426 426 426 426 426 426 425 425 425 425 425 425
## [86] 425 425 425 424 424 424 424 424 424 424 424 424 423 423 423 423 423
## [103] 423 423 423 423 422 422 422 422 422 422 422 422 422 421 421 421 421
## [120] 421 421 421 421 421 420 420 420 420 420 420 420 420 420 419 419 419
## [137] 419 419 419 419 419 418 418 418 418 418 418 417 417 417 417 417 417
## [154] 417 416 416 416 416 416 416 415 415 415 415 415 415 415 415 415 414
## [171] 414 414 414 414 414 414 414 414 413 413 413 413 413 413 413 413 413
## [188] 412 412 412 412 412 412 412 412 412 411 411 411 411 411 411 411 411
## [205] 411 410 410 410 410 410 410 410 410 410 409 409 409 409 409 409 409
## [222] 409 408 408 408 408 408 408 408 408 408 408 407 407 407 407 407 407
## [239] 407 407 406 406 406 406 406 406 406 406 406 405 381 380 380 379 379
## [256] 378 378 378 377 376 376 376 376 376 376 376 376 376 375 375 375 375
## [273] 375 375 375 375 375 374 374 374 374 374 374 374 374 374 373 373 373
## [290] 373 373 373 373 373 373 372 372 372 372 372 372 372 372 372 371 371
## [307] 371 371 371 371 371 371 371 370 370 370 370 370 370 370 370 369 369
## [324] 369 369 369 369 369 369 369 368 368 368 368 368 368 367 367 367 367
## [341] 367 367 366 366 366 366 366 366 366 365 365 365 365 365 365 365 365
## [358] 365 364 364 364 364 364 364 364 364 364 363 363 363 363 363 363 363
## [375] 363 363 362 362 362 362 362 362 362 362 362 361 361 361 361 361 361
## [392] 361 361 361 360 360 360 360 360 360 360 360 360 359 359 359 359 359
## [409] 359 359 359 359 358 358 358 358 358 358 358 358 358 357 357 357 357
## [426] 357 357 357 357 357 356 356 356 356 356 356 356 356 356 355 355 355
## [443] 355 355 355 355 355 355 354 354 354 354 354 354 354 354 353 329 328
## [460] 328 327 327 326 326 326 326 325 325 325 325 325 325 325 325 325 324
## [477] 324 324 324 324 324 324 324 324 323 323 323 323 323 323 323 323 323
## [494] 322 322 322 322 322 322 322 322 322 321 321 321 321 321 321 321 321
## [511] 321 320 320 320 320 320 320 320 320 320 319 319 319 319 319 319 319
## [528] 319 319 318 318 318 318 318 318 318 318 318 317 317 317 317 317 317
## [545] 317 317 316 316 316 316 316 316 316 316 315 315 315 315 315 315 314
## [562] 314 314 314 314 314 313 313 313 313 313 313 313 312 312 312 312 312
## [579] 312 312 312 312 311 311 311 311 311 311 311 311 311 310 310 310 310
## [596] 310 310 310 310 310 309 309 309 309 309 309 309 309 309 308 308 308
## [613] 308 308 308 308 308 308 307 307 307 307 307 307 307 307 307 306 306
## [630] 306 306 306 306 306 306 306 305 305 305 305 305 305 305 305 305 304
## [647] 304 304 304 304 304 304 304 304 303 303 303 303 303 303 303 303 302
## [664] 276 275 275 274 274 273 273 273 273 272 272 272 272 272 272 272 272
## [681] 272 271 271 271 271 271 271 271 271 271 270 270 270 270 270 270 270
## [698] 270 270 269 269 269 269 269 269 269 269 269 268 268 268 268 268 268
## [715] 268 268 267 267 267 267 267 266 266 266 266 266 265 265 265 265 265
## [732] 265 265 265 265 264 264 264 264 264 264 264 264 264 263 263 263 263
## [749] 263 263 263 263 263 262 262 262 262 262 262 262 262 262 261 261 261
## [766] 261 261 261 261 261 261 260 260 260 260 260 260 260 260 260 259 259
## [783] 259 259 259 259 259 259 258 258 258 258 258 258 258 257 257 257 257
## [800] 257 256 256 256 256 256 256 256 255 255 255 255 255 255 255 255 255
## [817] 254 254 254 254 254 254 254 254 254 253 253 253 253 253 253 253 253
## [834] 252 252 252 252 252 252 252 252 252 252 251 251 251 251 251 251 251
## [851] 251 251 250 250 250 250 250 250 250 250 249 249 249 249 249 248 248
## [868] 248 248 248 224 223 223 222 222 221 221 221 221 220 220 220 220 220
## [885] 220 220 220 220 219 219 219 219 219 219 219 219 219 218 218 218 218
## [902] 218 218 218 218 218 217 217 217 217 217 217 217 217 217 216 216 216
## [919] 216 216 216 216 216 216 215 215 215 215 215 215 215 215 215 214 214
## [936] 214 214 214 214 214 214 214 213 213 213 213 213 213 213 213 213 212
## [953] 212 212 212 212 212 212 212 212 211 211 211 211 211 211 211 211 210
## [970] 210 210 210 210 210 210 209 209 209 209 209 209 208 208 208 208 208
## [987] 208 207 207 207 207 207 207 207 207 207 206 206 206 206 206 206 206
## [1004] 206 206 205 205 205 205 205 205 205 205 205 204 204 204 204 204 204
## [1021] 204 204 204 203 203 203 203 203 203 203 203 203 202 202 202 202 202
## [1038] 202 202 201 201 201 201 201 201 201 201 201 201 201 200 200 200 200
## [1055] 200 200 200 200 200 199 199 199 199 199 199 199 199 199 198 198 198
## [1072] 198 198 198 197 197 197 172 171 171 170 170 169 169 169 169 168 168
## [1089] 168 168 168 168 168 168 168 167 167 167 167 167 167 167 167 167 166
## [1106] 166 166 166 166 166 166 166 166 165 165 165 165 165 165 165 165 165
## [1123] 164 164 164 164 164 164 164 164 164 163 163 163 163 163 163 163 163
## [1140] 163 162 162 162 162 162 162 162 162 162 161 161 161 161 161 161 161
## [1157] 161 161 160 160 160 160 160 160 160 160 160 159 159 159 159 159 159
## [1174] 159 159 159 158 158 158 158 158 158 157 157 157 157 157 156 156 156
## [1191] 156 156 156 156 155 155 155 155 155 155 155 155 155 154 154 154 154
## [1208] 154 154 154 154 154 153 153 153 153 153 153 153 153 153 152 152 152
## [1225] 152 152 152 152 152 152 151 151 151 151 151 151 151 151 151 150 150
## [1242] 150 150 150 150 150 150 149 149 149 149 149 149 149 149 149 149 148
## [1259] 148 148 148 148 148 148 148 148 147 147 147 147 147 147 147 147 146
## [1276] 146 146 146 146 146 146 146 145 145 120 119 119 118 118 117 117 117
## [1293] 117 116 116 116 116 116 116 116 116 115 115 115 115 115 115 115 115
## [1310] 114 114 114 114 114 114 114 114 113 113 113 113 113 113 113 113 112
## [1327] 112 112 112 112 112 112 112 111 111 111 111 111 111 111 110 110 110
## [1344] 110 110 110 110 110 109 109 109 109 109 109 109 109 108 108 108 108
## [1361] 108 108 108 107 107 107 107 107 107 107 107 106 106 106 106 106 106
## [1378] 106 106 105 105 105 105 105 105 105 105 104 104 104 104 104 104 104
## [1395] 104 103 103 103 103 103 103 103 103 102 102 102 102 102 102 102 102
## [1412] 101 101 101 101 101 101 101 101 100 100 100 100 100 100 100 100 99
## [1429] 99 99 99 99 99 99 99 98 98 98 98 98 98 97 97 97 97
## [1446] 97 97 97 96 96 96 96 96 96 96 96 95 95 95 95 95 95
## [1463] 95 95 94 94 94 94 94 94 94 94 93 93 93 93 93 93 93
## [1480] 92 68 67 66 66 65 65 64 64 64 64 63 63 63 63 63 63
## [1497] 63 63 62 62 62 62 62 62 62 62 61 61 61 61 61 61 61
## [1514] 61 60 60 60 60 60 60 60 60 59 59 59 59 59 59 59 59
## [1531] 58 58 58 58 58 58 58 58 57 57 57 57 57 57 57 57 56
## [1548] 56 56 56 56 56 56 56 55 55 55 55 55 55 55 54 54 54
## [1565] 54 53 53 53 53 53 52 52 52 52 52 52 52 52 51 51 51
## [1582] 51 51 51 51 51 50 50 50 50 50 50 50 50 49 49 49 49
## [1599] 49 49 49 49 48 48 48 48 48 48 48 48 47 47 47 47 47
## [1616] 47 47 47 46 46 46 46 46 46 46 46 45 45 45 45 45 45
## [1633] 45 45 44 44 44 44 44 44 44 44 43 43 43 43 43 43 43
## [1650] 43 42 42 42 42 42 42 42 41 41 41 41 41 41 41 41 40
## [1667] 15 14 14 13 13 12 12 12 12 11 11 11 11 11 11 11 11
## [1684] 10 10 10 10 10 10 10 10 9 9 9 9 9 9 9 9 8
## [1701] 8 8 8 8 8 8 8 7 7 7 7 7 7 7 7 6 6
## [1718] 6 6 6 6 6 6 5 5 5 5 5 5 5 5 4 4 4
## [1735] 4 4 4 4 4 3 3 3 3 3 3 3 3 2 2 2 2
## [1752] 2 2 2 2 1 1 1
#create a design matrix
Teams <- sort(as.character(unique(c(as.character(afl_bookies$Home.Team)))))
#Defining the number of things
nTeams <- length(Teams)
nWeeks <- max(afl_bookies$week)
n <- nrow(afl_bookies)
#Defining the design matrix
x <- matrix(0, nrow = dim(afl_bookies)[1], ncol = length(Teams))
for (i in 1:dim(afl_bookies)[1]) {
x[i, which(as.character(afl_bookies[i,"home"]) == Teams)] <- (1)
x[i, which(as.character(afl_bookies[i,"away"]) == Teams)] <- (-1)
}
model.string <-"
model {
for (i in 1:n) {
y[i] ~ dnorm(mu[i], tauGame)
mu[i] <- alpha + inprod(theta[w[i],],x[i,])
}
for (j in 1:nTeams){
theta[1,j] ~ dnorm(0, tauSeason)
}
for (www in 2:nWeeks) {
for (j in 1:nTeams) {
theta[www,j] ~ dnorm(gammaWeek*theta[www-1,j], tauWeek)
}
}
alpha ~ dnorm(0,0.0001)
tauGame ~ dunif(0,1000) #uncertainty in outcome for each game
tauWeek ~ dunif(0,1000)
tauSeason ~ dunif(0,1000) #variance parameter for the first week of the season
gammaWeek ~ dunif(0,1.5)
}
"
model.spec<-textConnection(model.string)
library(rjags)
n.chains <- 3
n.adapt <- n.update <- n.draws <- 1000
posteriorDraws = c('alpha','theta')
thin <- 5
jags <- jags.model(model.spec,
data = list('y' = y,'x' = x, 'w' = w, 'n' = n,'nTeams' = nTeams,'nWeeks' = nWeeks),
n.chains = n.chains, n.adapt = n.adapt)
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph information:
## Observed stochastic nodes: 1077
## Unobserved stochastic nodes: 9002
## Total graph size: 54294
##
## Initializing model
update(jags, n.update)
z <- jags.samples(jags, posteriorDraws, n.draws, thin = thin)
colours <- c("#7fc97f", "#beaed4", "#fdc086")
hfas <- data.frame(round(z$alpha[,,], 3)) %>% mutate(draw = 1:n())
hfas %>% ggplot(aes(draw, X1)) +
geom_line(colour = colours[1]) +
geom_line(data = hfas, aes(draw, X2), colour = colours[2]) +
geom_line(data = hfas, aes(draw, X3), colour = colours[3]) +
xlab("Draw") +
ggtitle("Home advantage (logit scale)") +
ylab("") +
theme_bw()
There you go, pretty cool!