'R: Panel Data: calculating mean and median of variables based on date / dummy variable

So i am analysing fund panel data. I estimated a fixed effect model with double clustered error terms along the identification (ISIN) and (Date).

Each fund has a dummy variable: 0 the periods preceding a name change and 1 for the periods after. Now, I'd like to get descriptive statistics (means and median) for each variable 6m prior name change and 6m after.

First and foremost I am struggling to find key words that i could google. But essentially i want to identify via the dummy variable the name change date and then calculate the mean and median for 6m prior and 6m after.

output for dput(data) is :

> dput(nd[1:100, ])
structure(list(Date = structure(c(1517356800, 1519776000, 1522454400, 
1525046400, 1527724800, 1530316800, 1532995200, 1535673600, 1538265600, 
1540944000, 1543536000, 1546214400, 1548892800, 1551312000, 1553990400, 
1556582400, 1559260800, 1561852800, 1564531200, 1567209600, 1569801600, 
1572480000, 1575072000, 1577750400, 1580428800, 1582934400, 1585612800, 
1588204800, 1590883200, 1593475200, 1596153600, 1598832000, 1601424000, 
1604102400, 1606694400, 1609372800, 1612051200, 1614470400, 1617148800, 
1619740800, 1622419200, 1625011200, 1627689600, 1630368000, 1632960000, 
1635638400, 1638230400, 1640908800, 1517356800, 1519776000, 1522454400, 
1525046400, 1527724800, 1530316800, 1532995200, 1535673600, 1538265600, 
1540944000, 1543536000, 1546214400, 1548892800, 1551312000, 1553990400, 
1556582400, 1559260800, 1561852800, 1564531200, 1567209600, 1569801600, 
1572480000, 1575072000, 1577750400, 1580428800, 1582934400, 1585612800, 
1588204800, 1590883200, 1593475200, 1596153600, 1598832000, 1601424000, 
1604102400, 1606694400, 1609372800, 1612051200, 1614470400, 1617148800, 
1619740800, 1622419200, 1625011200, 1627689600, 1630368000, 1632960000, 
1635638400, 1638230400, 1640908800, 1517356800, 1519776000, 1522454400, 
1525046400), tzone = "UTC", class = c("POSIXct", "POSIXt")), 
    Dummy = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
    1, 1, 1, 1, 0, 0, 0, 0), ISIN = c("LU1883312628", "LU1883312628", 
    "LU1883312628", "LU1883312628", "LU1883312628", "LU1883312628", 
    "LU1883312628", "LU1883312628", "LU1883312628", "LU1883312628", 
    "LU1883312628", "LU1883312628", "LU1883312628", "LU1883312628", 
    "LU1883312628", "LU1883312628", "LU1883312628", "LU1883312628", 
    "LU1883312628", "LU1883312628", "LU1883312628", "LU1883312628", 
    "LU1883312628", "LU1883312628", "LU1883312628", "LU1883312628", 
    "LU1883312628", "LU1883312628", "LU1883312628", "LU1883312628", 
    "LU1883312628", "LU1883312628", "LU1883312628", "LU1883312628", 
    "LU1883312628", "LU1883312628", "LU1883312628", "LU1883312628", 
    "LU1883312628", "LU1883312628", "LU1883312628", "LU1883312628", 
    "LU1883312628", "LU1883312628", "LU1883312628", "LU1883312628", 
    "LU1883312628", "LU1883312628", "NL0000289783", "NL0000289783", 
    "NL0000289783", "NL0000289783", "NL0000289783", "NL0000289783", 
    "NL0000289783", "NL0000289783", "NL0000289783", "NL0000289783", 
    "NL0000289783", "NL0000289783", "NL0000289783", "NL0000289783", 
    "NL0000289783", "NL0000289783", "NL0000289783", "NL0000289783", 
    "NL0000289783", "NL0000289783", "NL0000289783", "NL0000289783", 
    "NL0000289783", "NL0000289783", "NL0000289783", "NL0000289783", 
    "NL0000289783", "NL0000289783", "NL0000289783", "NL0000289783", 
    "NL0000289783", "NL0000289783", "NL0000289783", "NL0000289783", 
    "NL0000289783", "NL0000289783", "NL0000289783", "NL0000289783", 
    "NL0000289783", "NL0000289783", "NL0000289783", "NL0000289783", 
    "NL0000289783", "NL0000289783", "NL0000289783", "NL0000289783", 
    "NL0000289783", "NL0000289783", "DE0008474008", "DE0008474008", 
    "DE0008474008", "DE0008474008"), Returns = c(-0.12401, -4.15496, 
    -1.39621, 4.46431, -2.28814, -0.58213, 3.61322, -3.56401, 
    0.6093, -4.73124, 0.88597, -5.55014, 5.12313, 2.65441, 1.3072, 
    2.99972, -5.1075, 3.51965, 0.24626, -2.21961, 4.48332, -0.03193, 
    2.19313, 1.81355, -2.2836, -8.3185, -14.58921, 4.47981, 4.52948, 
    5.51294, -2.16857, 2.56992, -2.04736, -6.17825, 14.71218, 
    1.24079, -1.33888, 3.5197, 8.09674, 1.43074, 3.79434, 0.47398, 
    1.57474, 2.48837, -3.08439, 3.68851, -2.93803, 6.43656, 2.67598, 
    -3.39767, -5.27997, 4.76756, 4.89914, -0.95931, 2.22484, 
    3.01478, 1.63997, -6.64158, 3.46497, -8.54853, 7.40113, 5.68973, 
    1.64367, 4.35256, -5.09351, 3.43618, 2.16774, -0.77703, 3.16832, 
    1.65626, 4.91897, 1.76163, 1.49508, -5.16847, -9.53639, 12.74246, 
    3.08746, 3.4028, 0.09515, 5.66077, -2.85661, -2.58972, 9.53565, 
    2.93138, 0.32556, 2.92393, 5.02059, 0.98137, 0.58733, 4.91219, 
    2.21603, 2.52087, -3.87762, 7.66159, -0.04559, 4.48257, 2.83511, 
    -6.27841, -3.98683, 4.99554), Flows = c(-0.312598458, -37.228563578, 
    -119.065088084, -85.601069424, -46.613436838, -20.996760878, 
    -12.075112555, -40.571568112, -16.210315254, -54.785115578, 
    -55.93565336, -25.073939479, -16.513305702, -111.112262813, 
    -17.260252326, -44.287088276, -84.358676293, -12.73665543, 
    -14.846322594, -30.353217826, -43.002634628, -31.293725624, 
    -32.291532262, -21.145334594, -33.460150254, -22.458849454, 
    -34.690817528, -34.088358344, -4.069613214, -7.841523244, 
    -6.883674001, -11.99060429, -19.155102931, -20.274682083, 
    -33.509645025, -25.764368282, -22.451403457, -39.075362392, 
    -9.772306537, -7.214728071, -10.462230506, -12.550102699, 
    -0.439609898, -16.527865041, -15.938402293, -10.916678964, 
    -11.041205907, -11.627537098, -13.797947969, -18.096144272, 
    29.879529566, -51.895196556, -3.192064966, -1.469562773, 
    9.739671656, -35.108549922, -19.490401121, 36.459406559, 
    -66.213269625, 8.105824198, -17.078089399, -59.408458411, 
    1.227033593, -42.501421101, -15.275983037, 19.425363714, 
    -23.165013159, -19.68599313, -20.478530269, -19.566890333, 
    -19.63229278, -59.274372862, -37.128708445, 5.129404763, 
    -2.650978954, -0.566245645, -14.80700799, 4.891308881, -18.16286654, 
    -17.570559084, -2.726629634, -14.482219321, -35.795673521, 
    -10.119935801, -14.37900783, -20.385053784, -4.550848701, 
    -17.672355509, -14.270420088, 1.440911458, -8.924636198, 
    -5.749771862, -12.284920947, -23.093834986, -13.553880939, 
    -31.572182943, -22.977082191, -8.076560195, -11.825577374, 
    -9.263872938), TNA = c(2474.657473412, 2327.75517961, 2171.146502197, 
    2175.433117247, 2082.147188171, 2042.121760963, 2031.311390907, 
    1918.904748403, 1914.140451001, 1765.867322561, 1724.972362171, 
    1600.059421422, 1605.009162592, 1539.205393073, 1540.8291693, 
    1538.550310809, 1370.631945404, 1404.091772234, 1351.60138448, 
    1290.98574898, 1309.942298579, 1280.634128059, 1278.146819041, 
    1281.50075434, 1189.563983023, 1062.001168646, 859.735053702, 
    868.096185968, 894.397805491, 933.614731653, 885.975121845, 
    897.018097461, 854.196359787, 781.178047528, 863.00585297, 
    846.859512502, 796.10866733, 784.290994645, 838.747509395, 
    841.511540715, 863.678978862, 854.663205271, 856.363306246, 
    859.460891875, 816.275861034, 836.347760358, 800.867957871, 
    842.657752288, 2742.709413, 2629.70296, 2518.690562, 2516.902480001, 
    2635.037923, 2606.124805, 2672.082125, 2715.556617, 2738.845915, 
    2591.318371, 2613.260789, 2396.060545001, 2554.437804, 2638.160519, 
    2680.990319, 2753.467368, 2533.347075001, 2637.887076, 2670.127393, 
    2628.138778001, 2688.643794, 2711.56785, 2823.634535001, 
    2811.983963001, 2835.218976, 2672.765021, 2413.332814, 2718.586512, 
    2727.69596, 2823.040628, 2805.482839, 2944.602701, 2855.870812, 
    2765.189256, 2990.804719, 3066.36598, 3059.603769, 3126.458368, 
    3276.612153, 3289.257788, 3291.864476, 3397.759970999, 3461.462599, 
    3540.518638, 3388.702548, 3622.641661, 3604.82519, 3732.115875999, 
    4129.617979, 3857.780349, 3687.848268001, 3858.323607), Age = c(2, 
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 
    3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 
    5, 5, 5, 5, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 
    9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 
    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 
    12, 12, 12, 12, 12, 12, 12, 12, 62, 62, 62, 62)), row.names = c(NA, 
-100L), class = c("tbl_df", "tbl", "data.frame"))

library(plm)
attach(nd)
library(lmtest)
library(stargazer)
library(sandwich)
library(etable)
library(pacman)
library(fixest)
library(multiwayvcov)
library(foreign)


#cleaning
#adjust units of TNA and Flows 
#nd <- nd %>% 
  #mutate(TNA = TNA / 1000000, Flows = Flows / 1000000) #1mio and 1mio 
#drop na's
#nd <- nd %>%
  #drop_na()

#variable creation for model
Y <- cbind(nd$Flows)
X <- cbind(nd$Dummy, lag(nd$Returns), lag(nd$TNA), nd$Age) 

# descriptive statistics 
summary(Y)
summary(X)


#random effects 
        random2 <- plm(Y ~ X, nd, model='random', index=c('ISIN', 'Date'))
        summary(random2)

#fixed effect model 
fixed2 <- plm(Y ~ X, nd, model='within', index=c('ISIN', 'Date'))

# Breusch-Pagan Test 
bptest(fixed2)

#Test which model to use fixed effect or random effects
#hausmann test 
phtest(random2, fixed2) # we take fixed effects 



##Double-clustering formula (Thompson, 2011)
vcovDC <- function(x, ...){
  vcovHC(x, cluster="ISIN", ...) + vcovHC(x, cluster="Date", ...) - 
    vcovHC(x, method="white1", ...)
}

testamk <- plm(Y ~ X, nd, model='within', index=c('ISIN', 'Date'))
summary(testamk)

coeftest(testamk, vcov=function(x) vcovHC(x, cluster="group", type="HC1"))

Many thanks in advance! Joe



Solution 1:[1]

I solved the issue via lagging the whole data set.

library(readxl)
New_R_done <- read_excel("New_R done.xlsx")
View(New_R_done)

library(tidyverse)
library(plm)
descr <- pdata.frame(New_R_done, index=c("ISIN", "Date"))
str(descr)
length(levels(as.factor(descr$Dummy)))
length(levels(as.factor(descr$ISIN)))
descr <- descr %>% na.omit()
View(descr)
#Return 
descr$Returns <- ifelse(descr$Returns=="n.a", 0, descr$Returns)
descr$Returns <- as.numeric(descr$Returns)

#Flows
descr$Flows <- ifelse(descr$Flows=="n.a", 0, descr$Flows)
descr$Flows <- as.numeric(descr$Flows)

#TNA
descr$TNA <- ifelse(descr$TNA=="n.a", 0, descr$TNA)
descr$TNA <- as.numeric(descr$TNA)

name_change <- ifelse(descr$Dummy==1 & lag(descr$Dummy, 1)==0, 1, 0)

# Median Return 
lags <- plm::lag(descr$Returns, 1:6)
lag_median <- apply(lags, 1, median)
leads <- plm::lead(descr$Returns, 1:6)
lead_median <- apply(leads, 1, median)
d <- data.frame(name_change, lag_median, lead_median)
d[d$name_change==1,]

# Median Flows
lags_flow <- plm::lag(descr$Flows, 1:6)
lag_median_flow <- apply(lags_flow, 1, median)
leads_flow <- plm::lead(descr$Flows, 1:6)
lead_median_flow <- apply(leads_flow, 1, median)
d_flow <- data.frame(name_change, lag_median_flow, lead_median_flow)
d_flow[d_flow$name_change==1,]

#Median TNA
lags_TNA <- plm::lag(descr$TNA, 1:6)
lag_median_TNA <- apply(lags_TNA, 1, median)
leads_TNA <- plm::lead(descr$TNA, 1:6)
lead_median_TNA <- apply(leads_TNA, 1, median)
d_TNA <- data.frame(name_change, lag_median_TNA, lead_median_TNA)
d_TNA[d_TNA$name_change==1,]


# Mean Return 
lag_mean <- apply(lags, 1, mean)
lead_mean <- apply(leads, 1, mean)
d_m <- data.frame(name_change, lag_mean, lead_mean)
d_m[d_m$name_change==1,]

# Mean Flows 
lag_mean_flow <- apply(lags_flow, 1, mean)
lead_mean_flow <- apply(leads_flow, 1, mean)
d_flow_mean <- data.frame(name_change, lag_mean_flow, lead_mean_flow)
d_flow_mean[d_flow_mean$name_change==1,]

# Mean TNA 
lag_mean_tna <- apply(lags_TNA, 1, mean)
lead_mean_tna <- apply(leads_TNA, 1, mean)
d_tna_mean <- data.frame(name_change, lag_mean_tna, lead_mean_tna)
d_tna_mean[d_tna_mean$name_change==1,]

# Return Median +6 -6 
makeRETURN <- d[d$name_change==1,]
mean(makeRETURN$lag_median) 
mean(makeRETURN$lead_median, na.rm = TRUE)

# Flow Median +6 -6 
makeFlOW <- d_flow[d_flow$name_change==1,]
mean(makeFlOW$lag_median_flow) 
mean(makeFlOW$lead_median_flow, na.rm = TRUE)

# TNA Median +6 -6 
makeTNA <- d_TNA[d_TNA$name_change==1,]
mean(makeTNA$lag_median_TNA) 
mean(makeTNA$lead_median_TNA, na.rm = TRUE)

# Return Mean +6 -6 
makeRETURN_mean <- d_m[d_m$name_change==1,]
mean(makeRETURN_mean$lag_mean) 
mean(makeRETURN_mean$lead_mean, na.rm = TRUE)

# Flow Mean +6 -6 
makeFLOW_mean <- d_flow_mean[d_flow$name_change==1,]
mean(makeFLOW_mean$lag_mean_flow)            
mean(makeFLOW_mean$lead_mean_flow, na.rm = TRUE)

# TNA Mean +6 -6 
makeTNA_mean <- d_tna_mean[d_TNA$name_change==1,]
mean(makeTNA_mean$lag_mean_tna) 
mean(makeTNA_mean$lead_mean_tna, na.rm = TRUE)


library(stargazer)
#how to display a matrix with Return -- Mean - Median for +6 -6 ? 
stargazer(makeTNA_mean, makeFLOW_mean, makeRETURN_mean, makeTNA, makeFlOW, makeRETURN, type = "text")

Definitely not efficient as i create hundert of DFs. but it yields the wanted result. Maybe it helps someone.

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 krahllu