function hazdata = empiricalhazard(maxdur,hazfe)
% Empirical estimate of hazard function

% Load AHS data
data = xlsread('DurationDistribution.xlsx');

% Number of survey years and cohorts
[nsyr ncht] = size(data);
nsyr = nsyr - 1;
ncht = ncht - 1;

% Survey years
survyears = data(2:nsyr+1,1);

% Current years
years = (survyears(1:nsyr-1) + survyears(2:nsyr))/2;

% Average cohort move-in dates
movein = data(1,2:ncht+1);

% Surviving numbers of each cohort
cohorts = data(2:nsyr+1,2:ncht+1);

% Compute hazard rates
hazgrid = 1 - sqrt(cohorts(2:nsyr,:)./cohorts(1:nsyr-1,:));

% Year and cohort fixed effects
if hazfe
    hazgrid0 = hazgrid;
    hazgrid0(isnan(hazgrid0)) = 0;
    chtmean = (sum(hazgrid0)./sum(hazgrid0 > 0))';
    syrmean = (sum(hazgrid0')./sum(hazgrid0' > 0))';
    hazmean = sum(sum(hazgrid0)')./sum(sum(hazgrid0 > 0)');
    hazgrid = hazgrid - syrmean*ones(1,ncht) - ones(nsyr-1,1)*chtmean' + 2*hazmean*ones(nsyr-1,ncht);
end

% Numbers of observations
obsgrid = cohorts(1:nsyr-1,:);

% Compute durations
durgrid = years*ones(1,ncht) - ones(nsyr-1,1)*movein;

% Extract hazard function
m = sum(sum(~isnan(hazgrid)));
hazrate = reshape(hazgrid(~isnan(hazgrid)),m,1);
duration = reshape(durgrid(~isnan(hazgrid)),m,1);
numobs = reshape(obsgrid(~isnan(hazgrid)),m,1);

% Sort by duration
[duration srtidx] = sort(duration);
hazrate = hazrate(srtidx);
numobs = numobs(srtidx);

% Consolidate multiple observations at the same duration
uobs = ([1; diff(duration)] > 0);
uidx = (1:m)';
uidx = uidx(uobs);
dur = duration(uobs);
haz = cumsum(hazrate);
nobs = cumsum(numobs);
n = diff([uidx;m+1]);
haz = diff([0;haz([uidx(2:end)-1;m])])./n;
nobs = diff([0;nobs([uidx(2:end)-1;m])])./n;

% Truncate hazard function at maximum duration
dur = dur(dur < maxdur);
n = n(dur < maxdur);
haz = haz(dur < maxdur);
nobs = nobs(dur < maxdur);

% Weights based on number of observations used to calculate hazard rate
empweights = (1/nobs(1))*nobs;

% Package data
hazdata = {dur;haz;n;empweights};
