'R: Panel Data: calculating mean and median of variables based on date / dummy variable
So i am analysing fund panel data. I estimated a fixed effect model with double clustered error terms along the identification (ISIN) and (Date).
Each fund has a dummy variable: 0 the periods preceding a name change and 1 for the periods after. Now, I'd like to get descriptive statistics (means and median) for each variable 6m prior name change and 6m after.
First and foremost I am struggling to find key words that i could google. But essentially i want to identify via the dummy variable the name change date and then calculate the mean and median for 6m prior and 6m after.
output for dput(data) is :
> dput(nd[1:100, ])
structure(list(Date = structure(c(1517356800, 1519776000, 1522454400,
1525046400, 1527724800, 1530316800, 1532995200, 1535673600, 1538265600,
1540944000, 1543536000, 1546214400, 1548892800, 1551312000, 1553990400,
1556582400, 1559260800, 1561852800, 1564531200, 1567209600, 1569801600,
1572480000, 1575072000, 1577750400, 1580428800, 1582934400, 1585612800,
1588204800, 1590883200, 1593475200, 1596153600, 1598832000, 1601424000,
1604102400, 1606694400, 1609372800, 1612051200, 1614470400, 1617148800,
1619740800, 1622419200, 1625011200, 1627689600, 1630368000, 1632960000,
1635638400, 1638230400, 1640908800, 1517356800, 1519776000, 1522454400,
1525046400, 1527724800, 1530316800, 1532995200, 1535673600, 1538265600,
1540944000, 1543536000, 1546214400, 1548892800, 1551312000, 1553990400,
1556582400, 1559260800, 1561852800, 1564531200, 1567209600, 1569801600,
1572480000, 1575072000, 1577750400, 1580428800, 1582934400, 1585612800,
1588204800, 1590883200, 1593475200, 1596153600, 1598832000, 1601424000,
1604102400, 1606694400, 1609372800, 1612051200, 1614470400, 1617148800,
1619740800, 1622419200, 1625011200, 1627689600, 1630368000, 1632960000,
1635638400, 1638230400, 1640908800, 1517356800, 1519776000, 1522454400,
1525046400), tzone = "UTC", class = c("POSIXct", "POSIXt")),
Dummy = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 0, 0, 0, 0), ISIN = c("LU1883312628", "LU1883312628",
"LU1883312628", "LU1883312628", "LU1883312628", "LU1883312628",
"LU1883312628", "LU1883312628", "LU1883312628", "LU1883312628",
"LU1883312628", "LU1883312628", "LU1883312628", "LU1883312628",
"LU1883312628", "LU1883312628", "LU1883312628", "LU1883312628",
"LU1883312628", "LU1883312628", "LU1883312628", "LU1883312628",
"LU1883312628", "LU1883312628", "LU1883312628", "LU1883312628",
"LU1883312628", "LU1883312628", "LU1883312628", "LU1883312628",
"LU1883312628", "LU1883312628", "LU1883312628", "LU1883312628",
"LU1883312628", "LU1883312628", "LU1883312628", "LU1883312628",
"LU1883312628", "LU1883312628", "LU1883312628", "LU1883312628",
"LU1883312628", "LU1883312628", "LU1883312628", "LU1883312628",
"LU1883312628", "LU1883312628", "NL0000289783", "NL0000289783",
"NL0000289783", "NL0000289783", "NL0000289783", "NL0000289783",
"NL0000289783", "NL0000289783", "NL0000289783", "NL0000289783",
"NL0000289783", "NL0000289783", "NL0000289783", "NL0000289783",
"NL0000289783", "NL0000289783", "NL0000289783", "NL0000289783",
"NL0000289783", "NL0000289783", "NL0000289783", "NL0000289783",
"NL0000289783", "NL0000289783", "NL0000289783", "NL0000289783",
"NL0000289783", "NL0000289783", "NL0000289783", "NL0000289783",
"NL0000289783", "NL0000289783", "NL0000289783", "NL0000289783",
"NL0000289783", "NL0000289783", "NL0000289783", "NL0000289783",
"NL0000289783", "NL0000289783", "NL0000289783", "NL0000289783",
"NL0000289783", "NL0000289783", "NL0000289783", "NL0000289783",
"NL0000289783", "NL0000289783", "DE0008474008", "DE0008474008",
"DE0008474008", "DE0008474008"), Returns = c(-0.12401, -4.15496,
-1.39621, 4.46431, -2.28814, -0.58213, 3.61322, -3.56401,
0.6093, -4.73124, 0.88597, -5.55014, 5.12313, 2.65441, 1.3072,
2.99972, -5.1075, 3.51965, 0.24626, -2.21961, 4.48332, -0.03193,
2.19313, 1.81355, -2.2836, -8.3185, -14.58921, 4.47981, 4.52948,
5.51294, -2.16857, 2.56992, -2.04736, -6.17825, 14.71218,
1.24079, -1.33888, 3.5197, 8.09674, 1.43074, 3.79434, 0.47398,
1.57474, 2.48837, -3.08439, 3.68851, -2.93803, 6.43656, 2.67598,
-3.39767, -5.27997, 4.76756, 4.89914, -0.95931, 2.22484,
3.01478, 1.63997, -6.64158, 3.46497, -8.54853, 7.40113, 5.68973,
1.64367, 4.35256, -5.09351, 3.43618, 2.16774, -0.77703, 3.16832,
1.65626, 4.91897, 1.76163, 1.49508, -5.16847, -9.53639, 12.74246,
3.08746, 3.4028, 0.09515, 5.66077, -2.85661, -2.58972, 9.53565,
2.93138, 0.32556, 2.92393, 5.02059, 0.98137, 0.58733, 4.91219,
2.21603, 2.52087, -3.87762, 7.66159, -0.04559, 4.48257, 2.83511,
-6.27841, -3.98683, 4.99554), Flows = c(-0.312598458, -37.228563578,
-119.065088084, -85.601069424, -46.613436838, -20.996760878,
-12.075112555, -40.571568112, -16.210315254, -54.785115578,
-55.93565336, -25.073939479, -16.513305702, -111.112262813,
-17.260252326, -44.287088276, -84.358676293, -12.73665543,
-14.846322594, -30.353217826, -43.002634628, -31.293725624,
-32.291532262, -21.145334594, -33.460150254, -22.458849454,
-34.690817528, -34.088358344, -4.069613214, -7.841523244,
-6.883674001, -11.99060429, -19.155102931, -20.274682083,
-33.509645025, -25.764368282, -22.451403457, -39.075362392,
-9.772306537, -7.214728071, -10.462230506, -12.550102699,
-0.439609898, -16.527865041, -15.938402293, -10.916678964,
-11.041205907, -11.627537098, -13.797947969, -18.096144272,
29.879529566, -51.895196556, -3.192064966, -1.469562773,
9.739671656, -35.108549922, -19.490401121, 36.459406559,
-66.213269625, 8.105824198, -17.078089399, -59.408458411,
1.227033593, -42.501421101, -15.275983037, 19.425363714,
-23.165013159, -19.68599313, -20.478530269, -19.566890333,
-19.63229278, -59.274372862, -37.128708445, 5.129404763,
-2.650978954, -0.566245645, -14.80700799, 4.891308881, -18.16286654,
-17.570559084, -2.726629634, -14.482219321, -35.795673521,
-10.119935801, -14.37900783, -20.385053784, -4.550848701,
-17.672355509, -14.270420088, 1.440911458, -8.924636198,
-5.749771862, -12.284920947, -23.093834986, -13.553880939,
-31.572182943, -22.977082191, -8.076560195, -11.825577374,
-9.263872938), TNA = c(2474.657473412, 2327.75517961, 2171.146502197,
2175.433117247, 2082.147188171, 2042.121760963, 2031.311390907,
1918.904748403, 1914.140451001, 1765.867322561, 1724.972362171,
1600.059421422, 1605.009162592, 1539.205393073, 1540.8291693,
1538.550310809, 1370.631945404, 1404.091772234, 1351.60138448,
1290.98574898, 1309.942298579, 1280.634128059, 1278.146819041,
1281.50075434, 1189.563983023, 1062.001168646, 859.735053702,
868.096185968, 894.397805491, 933.614731653, 885.975121845,
897.018097461, 854.196359787, 781.178047528, 863.00585297,
846.859512502, 796.10866733, 784.290994645, 838.747509395,
841.511540715, 863.678978862, 854.663205271, 856.363306246,
859.460891875, 816.275861034, 836.347760358, 800.867957871,
842.657752288, 2742.709413, 2629.70296, 2518.690562, 2516.902480001,
2635.037923, 2606.124805, 2672.082125, 2715.556617, 2738.845915,
2591.318371, 2613.260789, 2396.060545001, 2554.437804, 2638.160519,
2680.990319, 2753.467368, 2533.347075001, 2637.887076, 2670.127393,
2628.138778001, 2688.643794, 2711.56785, 2823.634535001,
2811.983963001, 2835.218976, 2672.765021, 2413.332814, 2718.586512,
2727.69596, 2823.040628, 2805.482839, 2944.602701, 2855.870812,
2765.189256, 2990.804719, 3066.36598, 3059.603769, 3126.458368,
3276.612153, 3289.257788, 3291.864476, 3397.759970999, 3461.462599,
3540.518638, 3388.702548, 3622.641661, 3604.82519, 3732.115875999,
4129.617979, 3857.780349, 3687.848268001, 3858.323607), Age = c(2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 62, 62, 62, 62)), row.names = c(NA,
-100L), class = c("tbl_df", "tbl", "data.frame"))
library(plm)
attach(nd)
library(lmtest)
library(stargazer)
library(sandwich)
library(etable)
library(pacman)
library(fixest)
library(multiwayvcov)
library(foreign)
#cleaning
#adjust units of TNA and Flows
#nd <- nd %>%
#mutate(TNA = TNA / 1000000, Flows = Flows / 1000000) #1mio and 1mio
#drop na's
#nd <- nd %>%
#drop_na()
#variable creation for model
Y <- cbind(nd$Flows)
X <- cbind(nd$Dummy, lag(nd$Returns), lag(nd$TNA), nd$Age)
# descriptive statistics
summary(Y)
summary(X)
#random effects
random2 <- plm(Y ~ X, nd, model='random', index=c('ISIN', 'Date'))
summary(random2)
#fixed effect model
fixed2 <- plm(Y ~ X, nd, model='within', index=c('ISIN', 'Date'))
# Breusch-Pagan Test
bptest(fixed2)
#Test which model to use fixed effect or random effects
#hausmann test
phtest(random2, fixed2) # we take fixed effects
##Double-clustering formula (Thompson, 2011)
vcovDC <- function(x, ...){
vcovHC(x, cluster="ISIN", ...) + vcovHC(x, cluster="Date", ...) -
vcovHC(x, method="white1", ...)
}
testamk <- plm(Y ~ X, nd, model='within', index=c('ISIN', 'Date'))
summary(testamk)
coeftest(testamk, vcov=function(x) vcovHC(x, cluster="group", type="HC1"))
Many thanks in advance! Joe
Solution 1:[1]
I solved the issue via lagging the whole data set.
library(readxl)
New_R_done <- read_excel("New_R done.xlsx")
View(New_R_done)
library(tidyverse)
library(plm)
descr <- pdata.frame(New_R_done, index=c("ISIN", "Date"))
str(descr)
length(levels(as.factor(descr$Dummy)))
length(levels(as.factor(descr$ISIN)))
descr <- descr %>% na.omit()
View(descr)
#Return
descr$Returns <- ifelse(descr$Returns=="n.a", 0, descr$Returns)
descr$Returns <- as.numeric(descr$Returns)
#Flows
descr$Flows <- ifelse(descr$Flows=="n.a", 0, descr$Flows)
descr$Flows <- as.numeric(descr$Flows)
#TNA
descr$TNA <- ifelse(descr$TNA=="n.a", 0, descr$TNA)
descr$TNA <- as.numeric(descr$TNA)
name_change <- ifelse(descr$Dummy==1 & lag(descr$Dummy, 1)==0, 1, 0)
# Median Return
lags <- plm::lag(descr$Returns, 1:6)
lag_median <- apply(lags, 1, median)
leads <- plm::lead(descr$Returns, 1:6)
lead_median <- apply(leads, 1, median)
d <- data.frame(name_change, lag_median, lead_median)
d[d$name_change==1,]
# Median Flows
lags_flow <- plm::lag(descr$Flows, 1:6)
lag_median_flow <- apply(lags_flow, 1, median)
leads_flow <- plm::lead(descr$Flows, 1:6)
lead_median_flow <- apply(leads_flow, 1, median)
d_flow <- data.frame(name_change, lag_median_flow, lead_median_flow)
d_flow[d_flow$name_change==1,]
#Median TNA
lags_TNA <- plm::lag(descr$TNA, 1:6)
lag_median_TNA <- apply(lags_TNA, 1, median)
leads_TNA <- plm::lead(descr$TNA, 1:6)
lead_median_TNA <- apply(leads_TNA, 1, median)
d_TNA <- data.frame(name_change, lag_median_TNA, lead_median_TNA)
d_TNA[d_TNA$name_change==1,]
# Mean Return
lag_mean <- apply(lags, 1, mean)
lead_mean <- apply(leads, 1, mean)
d_m <- data.frame(name_change, lag_mean, lead_mean)
d_m[d_m$name_change==1,]
# Mean Flows
lag_mean_flow <- apply(lags_flow, 1, mean)
lead_mean_flow <- apply(leads_flow, 1, mean)
d_flow_mean <- data.frame(name_change, lag_mean_flow, lead_mean_flow)
d_flow_mean[d_flow_mean$name_change==1,]
# Mean TNA
lag_mean_tna <- apply(lags_TNA, 1, mean)
lead_mean_tna <- apply(leads_TNA, 1, mean)
d_tna_mean <- data.frame(name_change, lag_mean_tna, lead_mean_tna)
d_tna_mean[d_tna_mean$name_change==1,]
# Return Median +6 -6
makeRETURN <- d[d$name_change==1,]
mean(makeRETURN$lag_median)
mean(makeRETURN$lead_median, na.rm = TRUE)
# Flow Median +6 -6
makeFlOW <- d_flow[d_flow$name_change==1,]
mean(makeFlOW$lag_median_flow)
mean(makeFlOW$lead_median_flow, na.rm = TRUE)
# TNA Median +6 -6
makeTNA <- d_TNA[d_TNA$name_change==1,]
mean(makeTNA$lag_median_TNA)
mean(makeTNA$lead_median_TNA, na.rm = TRUE)
# Return Mean +6 -6
makeRETURN_mean <- d_m[d_m$name_change==1,]
mean(makeRETURN_mean$lag_mean)
mean(makeRETURN_mean$lead_mean, na.rm = TRUE)
# Flow Mean +6 -6
makeFLOW_mean <- d_flow_mean[d_flow$name_change==1,]
mean(makeFLOW_mean$lag_mean_flow)
mean(makeFLOW_mean$lead_mean_flow, na.rm = TRUE)
# TNA Mean +6 -6
makeTNA_mean <- d_tna_mean[d_TNA$name_change==1,]
mean(makeTNA_mean$lag_mean_tna)
mean(makeTNA_mean$lead_mean_tna, na.rm = TRUE)
library(stargazer)
#how to display a matrix with Return -- Mean - Median for +6 -6 ?
stargazer(makeTNA_mean, makeFLOW_mean, makeRETURN_mean, makeTNA, makeFlOW, makeRETURN, type = "text")
Definitely not efficient as i create hundert of DFs. but it yields the wanted result. Maybe it helps someone.
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 | krahllu |