Suppose we estimate a model and get the following results:
\[ \hat{\beta} = \begin{bmatrix} -0.4 \\ 0.5 \end{bmatrix} \]
\[ \nabla_{\beta}^2 \ln(\mathcal{L}) = \begin{bmatrix} -6000 & 60 \\ 60 & -700 \end{bmatrix} \]
hessian <- matrix(c(-6000, 60, 60, -700), ncol = 2, byrow = TRUE)
covariance <- -1*solve(hessian)
se <- sqrt(diag(covariance))
sigma1 <- se[1]
sigma2 <- se[2]
sigma1
#> [1] 0.01291548
sigma2
#> [1] 0.03781266
beta1 <- -0.4
beta2 <- 0.5
ci1 <- c(beta1 - 2*sigma1, beta1 + 2*sigma1)
ci2 <- c(beta2 - 2*sigma2, beta2 + 2*sigma2)
ci1
#> [1] -0.425831 -0.374169
ci2
#> [1] 0.4243747 0.5756253
Suppose we estimate the following utility model describing preferences for cars:
\[ u_j = \alpha p_j + \beta_1 x_j^{mpg} + \beta_2 x_j^{elec} + \varepsilon_j \]
The estimated model produces the following results:
Parameter | Coefficient |
---|---|
\(\alpha\) | -0.7 |
\(\beta_1\) | 0.1 |
\(\beta_2\) | -0.4 |
Hessian:
\[ \begin{bmatrix} -6000 & 50 & 60 \\ 50 & -700 & 50 \\ 60 & 50 & -300 \end{bmatrix} \]
mvrnorm()
function from the MASS
library.beta <- c(-0.7, 0.1, -4.0)
hessian <- matrix(c(
-6000, 50, 60,
50, -700, 50,
60, 50, -300),
ncol = 3, byrow = TRUE)
covariance <- -1*solve(hessian)
draws <- MASS::mvrnorm(10^5, beta, covariance)
head(draws)
#> [,1] [,2] [,3]
#> [1,] -0.6883767 0.07455516 -3.981057
#> [2,] -0.6849568 0.05760901 -4.105302
#> [3,] -0.7153545 0.16959248 -3.967045
#> [4,] -0.6972684 0.13993164 -3.860456
#> [5,] -0.6963814 0.09933565 -3.977531
#> [6,] -0.6853383 0.14693863 -3.895686
# Mean and CI for par 1
mean(draws[, 1])
#> [1] -0.6999935
quantile(draws[, 1], c(0.025, 0.975))
#> 2.5% 97.5%
#> -0.7254501 -0.6746434
# Compare to computed CI
sigma <- sqrt(diag(covariance))
c(beta[1] - 2*sigma[1], beta[1] + 2*sigma[1])
#> [1] -0.725857 -0.674143
# Mean and CI for par 2 & 3
mean(draws[, 2])
#> [1] 0.1000915
quantile(draws[, 2], c(0.025, 0.975))
#> 2.5% 97.5%
#> 0.02538884 0.17540911
mean(draws[, 3])
#> [1] -3.999676
quantile(draws[, 3], c(0.025, 0.975))
#> 2.5% 97.5%
#> -4.114231 -3.885681
# Solution using the class package ci() function:
maddTools::ci(draws, ci = 0.95)
#> mean lower upper
#> 1 -0.6999935 -0.72545007 -0.6746434
#> 2 0.1000915 0.02538884 0.1754091
#> 3 -3.9996755 -4.11423052 -3.8856808
Write code to read in the following two files:
pv_cells.csv
: Data on solar photovoltaic cell production by countrymilk_production.csv
: Data on milk production by statemilk_production <- read_csv(here('data', 'milk_production.csv'))
pv_cells <- read_csv(here('data', 'pv_cells.csv'))
Now modify the format of each:
pivot_longer()
pivot_wider()
# milk_production is in long format - convert to wide
milk_wide <- milk_production %>%
pivot_wider(
names_from = state,
values_from = milk_produced)
head(milk_wide)
#> # A tibble: 6 × 51
#> year Maine `New Hampshire` Vermont Massachusetts `Rhode Island` Connecticut
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 1970 6.19e8 356000000 1.97e9 658000000 75000000 661000000
#> 2 1971 6.29e8 359000000 2.02e9 658000000 70000000 660000000
#> 3 1972 6.38e8 353000000 2.04e9 629000000 69000000 643000000
#> 4 1973 6.14e8 337000000 1.95e9 595000000 64000000 614000000
#> 5 1974 6.11e8 329000000 1.94e9 593000000 63000000 613000000
#> 6 1975 6.29e8 336000000 2.01e9 601000000 63000000 608000000
#> # … with 44 more variables: New York <dbl>, New Jersey <dbl>,
#> # Pennsylvania <dbl>, Delaware <dbl>, Maryland <dbl>, Michigan <dbl>,
#> # Wisconsin <dbl>, Minnesota <dbl>, Ohio <dbl>, Indiana <dbl>,
#> # Illinois <dbl>, Iowa <dbl>, Missouri <dbl>, North Dakota <dbl>,
#> # South Dakota <dbl>, Nebraska <dbl>, Kansas <dbl>, Virginia <dbl>,
#> # West Virginia <dbl>, North Carolina <dbl>, Kentucky <dbl>, Tennessee <dbl>,
#> # South Carolina <dbl>, Georgia <dbl>, Florida <dbl>, Alabama <dbl>, …
# pv_cells is in wide format - convert to long
pv_cells_long <- pv_cells %>%
pivot_longer(
cols = China:World,
names_to = "country",
values_to = "numCells")
# Could also do this
pv_cells_long <- pv_cells %>%
pivot_longer(
cols = -Year,
names_to = "country",
values_to = "numCells")
head(pv_cells_long)
#> # A tibble: 6 × 3
#> Year country numCells
#> <dbl> <chr> <dbl>
#> 1 1995 China NA
#> 2 1995 Taiwan NA
#> 3 1995 Japan 16.4
#> 4 1995 Malaysia NA
#> 5 1995 Germany NA
#> 6 1995 South Korea NA