********************************************************************************
* Replication package for: "The Four R-stars: From Interest Rates to Inflation and Back"
*     Anna Pilipentseva, Nicholas Tokay, and Ricardo Reis 
*     2025
*
*     Creating Stata Datasets
********************************************************************************

clear all

*---------------------------------------------------------------*
* Define file paths (adjust these paths as needed)
*---------------------------------------------------------------*
*local basePath    "/Users/pilipentseva/Dropbox/Ricardo/RA-MarinaSeyed/Chile-rstar/Replication_Package"
local basePath   "/Users/r.a.reis/Dropbox/05Shared_folders/RA-CFMpredoc/Chile-rstar/Replication_Package"  // 

local rawDataPath "./Data/Raw Data"
local interDataPath "./Data/Intermediate Data"
cd "`basePath'"

********************************************************************************
* Historical UK Dataset
********************************************************************************

* Import UK historical data from the Raw source Excel file
import excel "`rawDataPath'/uk_historical.xlsx", sheet("Sheet1") firstrow clear

* Generate NOS variable, by subtracting depreciation
gen NOS = gos_total - depreciation

* --- Label each variable for clarity ---
label var capital_stock_non_dwel  "Capital Stock (Non-Dwellings)"
label var capital_stock_dwel      "Capital Stock (Dwellings)"
label var capital_stock_total     "Capital Stock (Total)"
label var year                    "Calendar Year"
label var gov_bond_rate           "Government Bond Rate"
label var cpi                     "Consumer Price Index"
label var gos_private_comp        "Gross Operating Surplus: Private Companies"
label var gos_gen_gov             "Gross Operating Surplus: General Government"
label var gos_pub_corp            "Gross Operating Surplus: Public Corporations"
label var gos_households          "Gross Operating Surplus: Households"
label var gos_total               "Gross Operating Surplus: Total"
label var depreciation            "Depreciation"
label var nndi                    "Net National Disposable Income"
label var NOS                     "Net Operating Surplus"

* --- Kepp only variables used in last draft of paper	
keep year NOS capital_stock_non_dwel cpi gov_bond_rate

* --- Save the UK historical dataset
save "`interDataPath'/uk_historical.dta", replace

********************************************************************************
* US Dataset (Historical and Current)
********************************************************************************

* --- Step 1: Create US dataset from ampf_main ---
* ampf_main comes from Reis(2022)
use "`rawDataPath'/ampf_main.dta", clear
keep if alpha2 == "US"
*Drop many variables that are never used in this project
drop m* MWT* GDP* D* I* K_nr* K_r* GVA* EZ EU BRICS AE EM LIDC govbond_*
save "`interDataPath'/ampf_main_us.dta", replace

* --- Step 2: Merge with historical US NOS data ---
import excel "`rawDataPath'/us_nos_l_share_hist.xlsx", sheet("Sheet1") firstrow clear
destring year, replace

* --- Label variables ---
label var year          "Calendar Year"
label var labor_share   "Labor Share"
label var capital_share "Capital Share"
label var nos_hist      "Historic Net Operating Surplus"
label var alpha2        "Alpha-2 Country Code"

merge 1:1 year alpha2 using "`interDataPath'/ampf_main_us.dta"
drop _merge
save "`interDataPath'/ampf_main_us.dta", replace

* --- Step 3: Add annual interest rate values ---
import excel "`rawDataPath'/rate_values_annual_us.xlsx", sheet("Sheet1") firstrow clear
drop if year == .
merge 1:1 year alpha2 using "`interDataPath'/ampf_main_us.dta"
drop _merge mich*
rename t tvar

*--- Label variables ---

label variable w_i              "Net National Wealth to Net National Income Ratio, sourced from WID"
label variable gamma            "Gamma, Z1 Accounts"
label variable nom_pot_gdp      "Nominal Potential GDP"
label variable y_ypot           "Real GDP / Potential GDP Ratio"
label variable tvar             "Term Premium on a 10 Year Zero Coupon Bond "
label variable spf10_median     "Survey of Professional Forecasters, 10-Year Forecast (Median)"
label variable spf1_median      "Survey of Professional Forecasters, 1-Year Forecast (Median)"
label variable int_rate_r_1y    "1-Year Real Interest Rate"
label variable dfii10           "Market Yield on U.S. Treasury Securities at 10-Year Constant Maturity, Quoted on an Investment Basis, Inflation-Indexed"
label variable alpha2           "Country/Region Code"
label variable effr             "Effective Federal Funds Rate"
label variable tb_3m            "3-Month Treasury Bill Rate"
label variable h_nr             "Housing (Non-Residential)"
label variable h_r              "Housing (Residential)"
label variable corp_profits     "Corporate Profits"
label variable corp_tax_revenue "Corporate Tax Revenue"
label variable aaa              "AAA Corporate Bond Yield"
label variable baa              "BAA Corporate Bond Yield"
label variable dep_priv         "Depreciation of Private Capital (consumption of fixed capital, private), BEA"
label variable year              "Calendar Year"
label variable dgs10            "10-Year Treasury Constant Maturity Rate"
label variable cpi              "Consumer Price Index"
label variable dgs1             "1-Year Treasury Constant Maturity Rate"
label variable inflation_target "Inflation Target"
label variable spf1_mean        "Survey of Professional Forecasters, 1-Year Forecast (Mean)"
label variable spf10_mean       "Survey of Professional Forecasters, 10-Year Forecast (Mean)"
label variable gpdi             "Gross Private Domestic Investment"
label variable nom_gdp          "Nominal GDP"
label variable i_y              "Investment to GDP Ratio"

* Save the US historical dataset
save "`interDataPath'/ampf_main_us.dta", replace

* --- Step 4: Incorporate Net National Income and Public Debt data and generate key variables ---
import excel "`rawDataPath'/nni_debt.xlsx", sheet("Sheet1") firstrow clear
drop if year == .
* --- Add variable labels ---
label variable year      "Year"
label variable nni       "Net National Income"
label variable debt_gdp  "Debt-to-GDP ratio"
merge 1:1 year using "`interDataPath'/ampf_main_us.dta"

* Generate financial ratios and key variables
gen kpriv_i   = Kpriv_n_NS / nni
gen ktotal_i  = K_n_NS / nni
gen bkpriv    = (w_i / 100 / kpriv_i) - 1
gen bktotal   = (w_i / 100 / ktotal_i) - 1

gen k_dwel    = h_r
gen k_nondwel = h_nr
gen debt      = debt_gdp * nom_gdp
gen b_h       = debt + k_dwel
gen k_h       = k_nondwel
gen bh_kh     = (b_h / k_h) - 1 

gen g_h       = (debt_gdp * nom_gdp) + k_h
gen g         = debt_gdp * nom_gdp
drop _merge

* Save the updated US dataset
save "`interDataPath'/ampf_main_us.dta", replace

* --- Step 5: Incorporate WACC data and generate key variables ---
* Import WACC data from Excel
import excel "`rawDataPath'/wacc.xlsx", firstrow clear

* --- Label variables ---
label var d   "Total Market Value of Debt, Nonfinancial corporate business; debt securities and loans; liability"
label var e   "Total Market Value of Equity, Market capitalization of listed domestic companies, World Bank"

* Merge WACC data with the US dataset
merge 1:1 year using "`interDataPath'/ampf_main_us.dta"

* Generate key variables for WACC calculation
gen v            = e + d
gen eff_tax_rate = corp_tax_revenue / corp_profits

drop _merge

* Save the final updated US dataset with WACC information
save "`interDataPath'/ampf_main_us.dta", replace

********************************************************************************
* US Quarterly interest rate y and expected pi Dataset
********************************************************************************

* Import and clean quarterly data from Excel
import excel "`rawDataPath'/r_dataset_quarterly.xlsx", sheet("Sheet1") firstrow clear
destring year, replace
destring quarter, replace
drop if year < 2000


* --- Add variable labels ---
label variable year              "Year"
label variable quarter           "Quarter"
label variable spf1_mean         "SPF 1-year forecast (mean)"
label variable spf1_median       "SPF 1-year forecast (median)"
label variable spf10_mean        "SPF 10-year forecast (mean)"
label variable spf10_median      "SPF 10-year forecast (median)"
label variable dgs10             "10-year Treasury yield (DGS10)"
label variable dgs1              "1-year Treasury yield (DGS1)"
label variable inflation_target  "Inflation target"
label variable mich_1y_median    "Michigan 1-year inflation expectation (median)"
label variable cpi_annualized    "CPI annualized growth"
label variable alpha2            "Alpha2 parameter"
label variable dfii10            "10-year TIPS-based inflation index (DFII10)"
label variable effr              "Effective Federal Funds Rate (EFFR)"

* Save the quarterly dataset
save "`interDataPath'/r_dataset_quarterly.dta", replace


********************************************************************************
* US Quarterly NOS private
********************************************************************************

*Read initial data from BEA
{
import excel "`rawDataPath'/GOS_US.xlsx", sheet("Table") cellrange(A6) firstrow clear

				* Filter
				keep if B == "Net operating surplus" | B == "Taxes on production and imports" | B == "Consumption of fixed capital" | B == "Less: Subsidies1" | B == "    Private enterprises"
				drop Line
				
* Define the starting year and quarter
local year = 2006
local quarter = 1

* Loop over columns from C to BX and rename them
local cols = "C D E F G H I J K L M N O P Q R S T U V W X Y Z AA AB AC AD AE AF AG AH AI AJ AK AL AM AN AO AP AQ AR AS AT AU AV AW AX AY AZ BA BB BC BD BE BF BG BH BI BJ BK BL BM BN BO BP BQ BR BS BT BU BV BW BX"

foreach col of local cols {
    * Create the new variable name based on year and quarter
    local newname = "year_`year'Q`quarter'"

    * Rename the column
    rename `col' `newname'

    * Update the quarter and year
    local quarter = `quarter' + 1
    if `quarter' > 4 {
        local quarter = 1
        local year = `year' + 1
    }
}
				* Make sure all variables are numeric
				qui ds *, has(type string)
				foreach i of varlist `r(varlist)' {
					replace `i' = "." if `i' == "---"
					destring `i', replace
				}
				
				* Reshape
reshape long year_, i(B) j(year_quarter) string
				replace B = "Cons_FK" if B == "Consumption of fixed capital"
				replace B = "Sub" if B == "Less: Subsidies1"
				replace B = "NOS_NS" if B == "Net operating surplus"
				replace B = "Tax" if B == "Taxes on production and imports"
				replace B = "NOS_priv_NS" if B == "    Private enterprises"
				reshape wide year_, i(year_quarter) j(B) string
				
				*Rename
				foreach i of varlist year_* {
					local newname = subinstr("`i'", "year_", "",.)
					rename `i' `newname'
				} 	
	
	drop Cons_FK  Sub Tax

gen quarter_pr = substr(quarter, 6, 1)         // Extract month (3 characters starting at position 4)
gen year = substr(quarter, 1, 4)          // Extract year (2 characters starting at position 8)

destring year, replace
destring quarter_pr, replace

gen date = yq(year,quarter_pr)
format date %tq
drop quarter
rename quarter_pr  quarter

}

* NOS rename for merger and in quarterly units
gen NOS_NS_dean = NOS_NS
gen NOS_priv_dean = NOS_priv

save "`interDataPath'/NOS_us_quarterly.dta", replace

*Merge with yield data
merge 1:1 year quarter using "`interDataPath'/r_dataset_quarterly.dta"
drop _merge

* Generate a quarterly date variable from year and quarter
gen qdate = yq(year, quarter)

* Format the new variable as a quarterly date
format qdate %tq

sort qdate

save "`interDataPath'/us_quarterly_working.dta", replace
