// ***************************************************

// * This files further cleans and create new variables for the market segment level regressions.
// * Merge the data with the distance to border file.
// * The data is saved in the data folder as Community_18M_distance.dta


//***************************************************
// * This files uses market level data cleaned before and the distance to border file.
//**************************************************/

cap log close  
log using "$path_results/001_cleaning_add_data.log",replace

use "$path_data/Community_18M.dta", clear

// save file with one random observation
preserve 
	keep if HouseType=="Detached"
	keep if _n==1000
	save "$path_data/Community_18M_sample.dta", replace
restore

merge m:1 Area Municipality Community using "$path_data/community_distance.dta"
keep if _m==3

summarize HouseType

* yearmonth
rename YR YOS
rename Mn MOS
gen yearmonth=ym(YOS,MOS)
format yearmonth %tm

* houses sample
keep if Area=="Toronto" | (Municipality=="Mississauga" | Municipality=="Brampton" | Municipality=="Vaughan" | Municipality=="Richmond Hill" | Municipality=="Markham" | Municipality=="Pickering")

*rename house types
encode HouseType,gen(PropertyType)

** toronto
gen toronto=1*(Area=="Toronto")
gen post=1*(yearmonth>ym(2008,02))
gen LTT=toronto*post
   
** GENERATING +-6 TO DUMMIES
gen TO_3m=1*(toronto==1 & yearmonth==ym(2007,10))
replace TO_3m=2*(toronto==1 & yearmonth==ym(2007,11)) if TO_3m==0
replace TO_3m=3*(toronto==1 & yearmonth==ym(2007,12)) if TO_3m==0
replace TO_3m=4*(toronto==1 & yearmonth==ym(2008,01)) if TO_3m==0
replace TO_3m=5*(toronto==1 & yearmonth==ym(2008,02)) if TO_3m==0
replace TO_3m=6*(toronto==1 & yearmonth==ym(2008,03)) if TO_3m==0

** SAMPLES **
gen sample3=1*(yearmonth>=ym(2006,01) & yearmonth<=ym(2012,02))
gen sample4=1*(yearmonth>=ym(2006,01) & yearmonth<=ym(2010,02))
gen sample5=1*(yearmonth>=ym(2006,01) & yearmonth<=ym(2014,02))
gen sample6=1*(yearmonth>=ym(2006,01) & yearmonth<=ym(2017,12)) //ym(2018,02) // * for yearly data
gen sample7=1*(yearmonth>=ym(2006,01) & yearmonth<=ym(2008,08))
gen sample8=1*(yearmonth>=ym(2006,01) & yearmonth<=ym(2016,02))
gen sample9=1*(yearmonth>=ym(2006,01) & yearmonth<=ym(2015,02))
gen sample10=1*(yearmonth>=ym(2006,01) & yearmonth<=ym(2017,02))


gen ind1 = 1*(yearmonth>ym(2008,01) & yearmonth<=ym(2010,02))
gen ind2 = 1*(yearmonth>ym(2010,02) & yearmonth<=ym(2012,02))
gen ind3 = 1*(yearmonth>ym(2012,02) & yearmonth<=ym(2014,02))
gen ind4 = 1*(yearmonth>ym(2014,02) & yearmonth<=ym(2016,02))
gen ind5 = 1*(yearmonth>ym(2016,02) & yearmonth<=ym(2018,02))

** FILLING GAPS
egen id=group(Area Municipality Community HouseType)
xtset id yearmonth
gen origdata=1
*tsfill, full
replace origdata=0 if origdata==.

*replace year and month of tsfill data
drop YOS MOS
gen date=dofm(yearmonth) 
gen YOS=year(date)
gen MOS=month(date)

*com
bys id: egen aux=mode(Community)
replace Community=aux if Community==""
drop aux

gen x_25=1*(distance<-2500 & distance>-5000)
gen LTT_distance=distance*LTT

rename YOS year
rename MOS month
encode Community, gen(com)

replace BTR_SaleCount = 0 if BTR_SaleCount==.
replace BTO_SaleCount = 0 if BTO_SaleCount==.
replace BTS_SaleCount = 0 if BTS_SaleCount==.
gen BTR_SaleCount_1 = BTR_SaleCount+1


* Generate LTT*Dummy_distance
egen d_max=max(abs(distance))

gen d_25=1*(abs(distance)/d_max <= 0.25)
gen d_50=1*(abs(distance)/d_max>0.25 & abs(distance)/d_max <=0.5)
gen d_75=1*(abs(distance)/d_max>0.5 & abs(distance)/d_max <=0.75)


gen LTT_d25=LTT*d_25
gen LTT_d50=LTT*d_50
gen LTT_d75=LTT*d_75

gen crisis= 1*(yearmonth>=ym(2008,09) & yearmonth<=ym(2008,11))

local loglist = "Total_SaleCount BTO_SaleCount BTR_SaleCount TotalLease_LeaseCount"

foreach var of varlist `loglist' {
	gen ln_`var' = log(`var')
}

// * Quarter periods

local minym = 552
local maxym = 732
// * Create period vaiables
forvalues i = `minym'(3)`maxym'{

	local i_1 = `i' +1
	local i_2 = `i' +2
	if `i_2'>`maxym'{
		local i_2 = `maxym'
	}
	if `i_1'>`maxym'{
		local i_1 = `maxym'
	}
	g three_month_per_`i' = 0

	replace three_month_per_`i' = 1 if yearmonth==`i' |yearmonth ==`i_1' |yearmonth ==`i_2'
}

egen quarter = group(three_month_per_*), missing
// invert order
replace quarter = 56-quarter

drop three_month_per_*

// ***************************************************
// * Saving the data
// ***************************************************

save "$path_data/Community_18M_distance.dta", replace

// ***************************************************

log close