{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "330fca3d-f8c0-49fb-aee0-8fbaf2fe55dc", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import statsmodels.formula.api as smf\n", "pd.set_option('display.max_colwidth', None)\n", "from statsmodels.iolib.summary2 import summary_col \n" ] }, { "cell_type": "code", "execution_count": 2, "id": "5a47dc9c-1011-4f85-8d5c-fa7875c21eb9", "metadata": {}, "outputs": [], "source": [ "import warnings\n", "# Ignore warnings by category\n", "warnings.filterwarnings(\"ignore\", category=DeprecationWarning)\n", "# Ignore warnings by message\n", "warnings.filterwarnings(\"ignore\", message=\".*\")" ] }, { "cell_type": "code", "execution_count": 3, "id": "41f17aab-1bf5-4319-bd2c-eabf08b38333", "metadata": {}, "outputs": [], "source": [ "bank_tract = pd.read_csv(\"../input_data_clean/bank_tract_clean_WITH_CENSUS.csv\")\n", "CA_df = bank_tract[bank_tract[\"state\"] == 6]\n", "AZ_df = bank_tract[bank_tract[\"state\"] == 4]\n", "CA_df['which_bank'] = CA_df['which_bank'].str.replace(' ', '')\n", "AZ_df['which_bank'] = AZ_df['which_bank'].str.replace(' ', '')" ] }, { "cell_type": "code", "execution_count": 14, "id": "3ff524a4-96d8-48f4-8a65-46936058d821", "metadata": {}, "outputs": [], "source": [ "# Calculate the hispanic rate and whether it's above the median\n", "CA_df['hisp_rate'] = (CA_df['HispanicLatinoPop'] / CA_df['Tot.Pop']) * 100\n", "CA_df['hisp_over_med'] = CA_df['hisp_rate'] > np.median(CA_df['hisp_rate'].dropna())\n", "CA_df['hisp_over_med'] = CA_df['hisp_over_med'].astype(int)\n", "\n", "# Calculate the log of number of applications\n", "CA_df['log_num_apps'] = np.log(CA_df['num_applications'])" ] }, { "cell_type": "code", "execution_count": 7, "id": "2399965e-d2af-4060-ab43-a00a2a1eec21", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1454" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# AZ_df.shape # 1792 rows\n", "AZ_df['census_tract'].nunique()" ] }, { "cell_type": "code", "execution_count": 23, "id": "e9dfefcf-d6fb-4015-a0a2-495c0ccb09cc", "metadata": {}, "outputs": [], "source": [ "def order_front(df, to_front):\n", " '''\n", " Moves columns in to_front to left of df.\n", " '''\n", " cols = list(df.columns)\n", " for c in to_front:\n", " cols.pop(cols.index(c))\n", " cols = to_front + cols\n", " return df[cols]\n", "\n", "AZ_df = order_front(AZ_df,['which_bank','census_tract'])" ] }, { "cell_type": "code", "execution_count": 24, "id": "4c1329cd-a29b-43aa-bad2-5da7df774161", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
which_bankcensus_tractNAMETot.PopTot.WhitePopTot.BlackPopTot.AmericIndianPopTot.AsianPopTot.NativeHawaiianPacificPopTot.OtherRaceAlonePop...mean_approved_interest_ratenum_approved_loansnum_applicationssum_all_loan_amountmedian_all_incomedenial_rateavg_approved_loan_sizehisp_ratehisp_over_medlog_num_apps
12045AllOtherBanks4001945001Census Tract 9450.01, Apache County, Arizona4128.0157.018.03897.00.00.017.0...2.56250023825000.0125.00.666667135000.0000002.88275201.098612
12046AllOtherBanks4001945100Census Tract 9451, Apache County, Arizona2748.0117.06.02563.00.00.047.0...3.37500011225000.0126.01.000000225000.0000004.62154300.000000
12047AllOtherBanks4001970200Census Tract 9702, Apache County, Arizona4552.04006.079.0218.078.00.024.0...3.25088026427310000.061.00.619048180769.23076913.62038703.737670
12048AllOtherBanks4001970300Census Tract 9703, Apache County, Arizona4007.03122.0137.0221.04.00.0159.0...3.08333335815000.085.00.600000108333.33333342.42575511.609438
12049BankofWest4001970300Census Tract 9703, Apache County, Arizona4007.03122.0137.0221.04.00.0159.0...2.62500024360000.046.00.500000100000.00000042.42575511.386294
12050AllOtherBanks4001970501Census Tract 9705.01, Apache County, Arizona5127.04829.07.0139.00.00.0112.0...2.99809521324990000.056.50.656250172142.85714320.34328103.465736
12051BankofWest4001970501Census Tract 9705.01, Apache County, Arizona5127.04829.07.0139.00.00.0112.0...3.8825002101050000.050.00.200000125000.00000020.34328102.302585
12052AllOtherBanks4001970502Census Tract 9705.02, Apache County, Arizona3939.03195.00.0654.00.09.070.0...2.979746596816820000.088.00.867647255338.98305111.14496104.219508
12053BankofWest4001970502Census Tract 9705.02, Apache County, Arizona3939.03195.00.0654.00.09.070.0...3.21250024320000.084.00.500000105000.00000011.14496101.386294
12054AllOtherBanks4003000100Census Tract 1, Cochise County, Arizona1705.01637.011.016.00.00.02.0...3.125000271295000.039.00.285714265000.00000032.78592411.945910
12055AllOtherBanks4003000201Census Tract 2.01, Cochise County, Arizona3449.03305.00.041.00.00.088.0...2.92888918233765000.085.00.782609158333.33333338.09799913.135494
12056BankofWest4003000201Census Tract 2.01, Cochise County, Arizona3449.03305.00.041.00.00.088.0...2.75000012690000.0121.50.500000345000.00000038.09799910.693147
12057AllOtherBanks4003000202Census Tract 2.02, Cochise County, Arizona3686.03316.096.041.022.026.0105.0...3.13840025314975000.051.50.806452165400.00000057.46066213.433987
12058AllOtherBanks4003000203Census Tract 2.03, Cochise County, Arizona2771.02538.00.039.00.00.053.0...2.94200035397205000.086.00.897436174714.28571427.53518613.663562
12059AllOtherBanks4003000301Census Tract 3.01, Cochise County, Arizona3668.03417.036.018.020.00.059.0...3.10721138477655000.049.00.808511170526.31578930.83424213.850148
12060AllOtherBanks4003000302Census Tract 3.02, Cochise County, Arizona4009.03724.037.0125.054.00.00.0...2.981591455610810000.071.50.803571208333.33333315.54003504.025352
12061AllOtherBanks4003000303Census Tract 3.03, Cochise County, Arizona3488.03285.00.00.00.0144.00.0...3.046348465610720000.055.00.821429201739.13043512.55733904.025352
12062AllOtherBanks4003000400Census Tract 4, Cochise County, Arizona1995.01889.00.00.00.00.012.0...3.11760924305350000.065.00.800000183333.33333318.09523803.401197
12063AllOtherBanks4003000500Census Tract 5, Cochise County, Arizona7594.06407.0775.0256.09.022.018.0...2.73791712223400000.054.00.545455191666.66666738.10903313.091042
12064AllOtherBanks4003000600Census Tract 6, Cochise County, Arizona3575.03263.076.031.00.00.0184.0...3.0825006101640000.043.00.600000145000.00000075.86014012.302585
\n", "

20 rows × 467 columns

\n", "
" ], "text/plain": [ " which_bank census_tract \\\n", "12045 AllOtherBanks 4001945001 \n", "12046 AllOtherBanks 4001945100 \n", "12047 AllOtherBanks 4001970200 \n", "12048 AllOtherBanks 4001970300 \n", "12049 BankofWest 4001970300 \n", "12050 AllOtherBanks 4001970501 \n", "12051 BankofWest 4001970501 \n", "12052 AllOtherBanks 4001970502 \n", "12053 BankofWest 4001970502 \n", "12054 AllOtherBanks 4003000100 \n", "12055 AllOtherBanks 4003000201 \n", "12056 BankofWest 4003000201 \n", "12057 AllOtherBanks 4003000202 \n", "12058 AllOtherBanks 4003000203 \n", "12059 AllOtherBanks 4003000301 \n", "12060 AllOtherBanks 4003000302 \n", "12061 AllOtherBanks 4003000303 \n", "12062 AllOtherBanks 4003000400 \n", "12063 AllOtherBanks 4003000500 \n", "12064 AllOtherBanks 4003000600 \n", "\n", " NAME Tot.Pop Tot.WhitePop \\\n", "12045 Census Tract 9450.01, Apache County, Arizona 4128.0 157.0 \n", "12046 Census Tract 9451, Apache County, Arizona 2748.0 117.0 \n", "12047 Census Tract 9702, Apache County, Arizona 4552.0 4006.0 \n", "12048 Census Tract 9703, Apache County, Arizona 4007.0 3122.0 \n", "12049 Census Tract 9703, Apache County, Arizona 4007.0 3122.0 \n", "12050 Census Tract 9705.01, Apache County, Arizona 5127.0 4829.0 \n", "12051 Census Tract 9705.01, Apache County, Arizona 5127.0 4829.0 \n", "12052 Census Tract 9705.02, Apache County, Arizona 3939.0 3195.0 \n", "12053 Census Tract 9705.02, Apache County, Arizona 3939.0 3195.0 \n", "12054 Census Tract 1, Cochise County, Arizona 1705.0 1637.0 \n", "12055 Census Tract 2.01, Cochise County, Arizona 3449.0 3305.0 \n", "12056 Census Tract 2.01, Cochise County, Arizona 3449.0 3305.0 \n", "12057 Census Tract 2.02, Cochise County, Arizona 3686.0 3316.0 \n", "12058 Census Tract 2.03, Cochise County, Arizona 2771.0 2538.0 \n", "12059 Census Tract 3.01, Cochise County, Arizona 3668.0 3417.0 \n", "12060 Census Tract 3.02, Cochise County, Arizona 4009.0 3724.0 \n", "12061 Census Tract 3.03, Cochise County, Arizona 3488.0 3285.0 \n", "12062 Census Tract 4, Cochise County, Arizona 1995.0 1889.0 \n", "12063 Census Tract 5, Cochise County, Arizona 7594.0 6407.0 \n", "12064 Census Tract 6, Cochise County, Arizona 3575.0 3263.0 \n", "\n", " Tot.BlackPop Tot.AmericIndianPop Tot.AsianPop \\\n", "12045 18.0 3897.0 0.0 \n", "12046 6.0 2563.0 0.0 \n", "12047 79.0 218.0 78.0 \n", "12048 137.0 221.0 4.0 \n", "12049 137.0 221.0 4.0 \n", "12050 7.0 139.0 0.0 \n", "12051 7.0 139.0 0.0 \n", "12052 0.0 654.0 0.0 \n", "12053 0.0 654.0 0.0 \n", "12054 11.0 16.0 0.0 \n", "12055 0.0 41.0 0.0 \n", "12056 0.0 41.0 0.0 \n", "12057 96.0 41.0 22.0 \n", "12058 0.0 39.0 0.0 \n", "12059 36.0 18.0 20.0 \n", "12060 37.0 125.0 54.0 \n", "12061 0.0 0.0 0.0 \n", "12062 0.0 0.0 0.0 \n", "12063 775.0 256.0 9.0 \n", "12064 76.0 31.0 0.0 \n", "\n", " Tot.NativeHawaiianPacificPop Tot.OtherRaceAlonePop ... \\\n", "12045 0.0 17.0 ... \n", "12046 0.0 47.0 ... \n", "12047 0.0 24.0 ... \n", "12048 0.0 159.0 ... \n", "12049 0.0 159.0 ... \n", "12050 0.0 112.0 ... \n", "12051 0.0 112.0 ... \n", "12052 9.0 70.0 ... \n", "12053 9.0 70.0 ... \n", "12054 0.0 2.0 ... \n", "12055 0.0 88.0 ... \n", "12056 0.0 88.0 ... \n", "12057 26.0 105.0 ... \n", "12058 0.0 53.0 ... \n", "12059 0.0 59.0 ... \n", "12060 0.0 0.0 ... \n", "12061 144.0 0.0 ... \n", "12062 0.0 12.0 ... \n", "12063 22.0 18.0 ... \n", "12064 0.0 184.0 ... \n", "\n", " mean_approved_interest_rate num_approved_loans num_applications \\\n", "12045 2.562500 2 3 \n", "12046 3.375000 1 1 \n", "12047 3.250880 26 42 \n", "12048 3.083333 3 5 \n", "12049 2.625000 2 4 \n", "12050 2.998095 21 32 \n", "12051 3.882500 2 10 \n", "12052 2.979746 59 68 \n", "12053 3.212500 2 4 \n", "12054 3.125000 2 7 \n", "12055 2.928889 18 23 \n", "12056 2.750000 1 2 \n", "12057 3.138400 25 31 \n", "12058 2.942000 35 39 \n", "12059 3.107211 38 47 \n", "12060 2.981591 45 56 \n", "12061 3.046348 46 56 \n", "12062 3.117609 24 30 \n", "12063 2.737917 12 22 \n", "12064 3.082500 6 10 \n", "\n", " sum_all_loan_amount median_all_income denial_rate \\\n", "12045 825000.0 125.0 0.666667 \n", "12046 225000.0 126.0 1.000000 \n", "12047 7310000.0 61.0 0.619048 \n", "12048 815000.0 85.0 0.600000 \n", "12049 360000.0 46.0 0.500000 \n", "12050 4990000.0 56.5 0.656250 \n", "12051 1050000.0 50.0 0.200000 \n", "12052 16820000.0 88.0 0.867647 \n", "12053 320000.0 84.0 0.500000 \n", "12054 1295000.0 39.0 0.285714 \n", "12055 3765000.0 85.0 0.782609 \n", "12056 690000.0 121.5 0.500000 \n", "12057 4975000.0 51.5 0.806452 \n", "12058 7205000.0 86.0 0.897436 \n", "12059 7655000.0 49.0 0.808511 \n", "12060 10810000.0 71.5 0.803571 \n", "12061 10720000.0 55.0 0.821429 \n", "12062 5350000.0 65.0 0.800000 \n", "12063 3400000.0 54.0 0.545455 \n", "12064 1640000.0 43.0 0.600000 \n", "\n", " avg_approved_loan_size hisp_rate hisp_over_med log_num_apps \n", "12045 135000.000000 2.882752 0 1.098612 \n", "12046 225000.000000 4.621543 0 0.000000 \n", "12047 180769.230769 13.620387 0 3.737670 \n", "12048 108333.333333 42.425755 1 1.609438 \n", "12049 100000.000000 42.425755 1 1.386294 \n", "12050 172142.857143 20.343281 0 3.465736 \n", "12051 125000.000000 20.343281 0 2.302585 \n", "12052 255338.983051 11.144961 0 4.219508 \n", "12053 105000.000000 11.144961 0 1.386294 \n", "12054 265000.000000 32.785924 1 1.945910 \n", "12055 158333.333333 38.097999 1 3.135494 \n", "12056 345000.000000 38.097999 1 0.693147 \n", "12057 165400.000000 57.460662 1 3.433987 \n", "12058 174714.285714 27.535186 1 3.663562 \n", "12059 170526.315789 30.834242 1 3.850148 \n", "12060 208333.333333 15.540035 0 4.025352 \n", "12061 201739.130435 12.557339 0 4.025352 \n", "12062 183333.333333 18.095238 0 3.401197 \n", "12063 191666.666667 38.109033 1 3.091042 \n", "12064 145000.000000 75.860140 1 2.302585 \n", "\n", "[20 rows x 467 columns]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "AZ_df.sort_values(['census_tract','which_bank']).head(20)" ] }, { "cell_type": "code", "execution_count": 10, "id": "c8107df0-ec91-4d5d-92b3-febed6905e90", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 1454.000000\n", "mean 1.232462\n", "std 0.422547\n", "min 1.000000\n", "25% 1.000000\n", "50% 1.000000\n", "75% 1.000000\n", "max 2.000000\n", "Name: which_bank, dtype: float64" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "AZ_df.groupby('census_tract')['which_bank'].count().describe()" ] }, { "cell_type": "code", "execution_count": 17, "id": "907e61fb-c7e5-4296-ac41-66f479e0fdee", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 1792.000000\n", "mean 30.141316\n", "std 23.097044\n", "min 0.305810\n", "25% 12.946783\n", "50% 22.662821\n", "75% 42.184397\n", "max 100.000000\n", "Name: hisp_rate, dtype: float64" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "AZ_df['hisp_rate'].describe()" ] }, { "cell_type": "code", "execution_count": 18, "id": "bac4b67c-5763-4531-af13-027566036891", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "-2.9648999999999996" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "-3.03+.00217*30" ] }, { "cell_type": "markdown", "id": "b018ef48-fb77-47de-adac-c6a144c19d4a", "metadata": {}, "source": [ "## Linear Regression - CA" ] }, { "cell_type": "code", "execution_count": 5, "id": "fd423330-5627-4db1-a6f0-8c396f6f84c5", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "=======================================================================\n", " Model 1 Model 2 Model 3 Model 4 \n", "-----------------------------------------------------------------------\n", "Intercept 0.8498*** 0.8427*** 4.2854*** 4.0568*** \n", " (0.0028) (0.0024) (0.0136) (0.0119) \n", "hisp_rate -0.0005*** -0.0125*** \n", " (0.0001) (0.0003) \n", "hisp_over_med -0.0213*** -0.4600***\n", " (0.0029) (0.0146) \n", "C(which_bank)[T.BankofWest] -0.0466*** -0.0453*** -2.9654*** -2.9225***\n", " (0.0031) (0.0031) (0.0148) (0.0154) \n", "R-squared 0.0221 0.0206 0.7705 0.7512 \n", "R-squared Adj. 0.0220 0.0204 0.7704 0.7512 \n", "=======================================================================\n", "Standard errors in parentheses.\n", "* p<.1, ** p<.05, ***p<.01\n" ] } ], "source": [ "# Calculate the hispanic rate and whether it's above the median\n", "CA_df['hisp_rate'] = (CA_df['HispanicLatinoPop'] / CA_df['Tot.Pop']) * 100\n", "CA_df['hisp_over_med'] = CA_df['hisp_rate'] > np.median(CA_df['hisp_rate'].dropna())\n", "CA_df['hisp_over_med'] = CA_df['hisp_over_med'].astype(int)\n", "\n", "# Calculate the log of number of applications\n", "CA_df['log_num_apps'] = np.log(CA_df['num_applications'])\n", "\n", "# Build initial models\n", "# Fit the linear regression models\n", "model1 = smf.ols('denial_rate ~ hisp_rate + C(which_bank)', data=CA_df).fit()\n", "model2 = smf.ols('denial_rate ~ hisp_over_med + C(which_bank)', data=CA_df).fit()\n", "model3 = smf.ols('log_num_apps ~ hisp_rate + C(which_bank)', data=CA_df).fit()\n", "model4 = smf.ols('log_num_apps ~ hisp_over_med + C(which_bank)', data=CA_df).fit()\n", "\n", "# Create a list of model results\n", "models = [model1, model2, model3, model4]\n", "\n", "# Generate the table of regression results\n", "table = summary_col(models, \n", " model_names=['Model 1', 'Model 2', 'Model 3', 'Model 4'], \n", " regressor_order=['Intercept', 'hisp_rate', 'hisp_over_med', 'C(which_bank)[T.BankofWest]'], \n", " float_format='%0.4f', \n", " stars=True)\n", "\n", "# Display the table\n", "print(table)\n" ] }, { "cell_type": "markdown", "id": "bcfbb5b1-03a2-489b-a61f-1fabc93d6e1b", "metadata": {}, "source": [ "## Linear Regression - AZ" ] }, { "cell_type": "code", "execution_count": 16, "id": "cfae0221-655e-4ba3-9deb-e73022b3d188", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "===========================================================================\n", " Model 1 Model 2 Model 3 Model 4 \n", "---------------------------------------------------------------------------\n", "Intercept 0.85239*** 0.84733*** 4.41751*** 4.26027*** \n", " (0.00501) (0.00434) (0.03390) (0.03007) \n", "hisp_rate -0.00064*** -0.01104*** \n", " (0.00012) (0.00084) \n", "hisp_over_med -0.02891*** -0.36179***\n", " (0.00572) (0.03966) \n", "C(which_bank)[T.BankofWest] 0.04171*** 0.04256*** -3.69478*** -3.66678***\n", " (0.00732) (0.00731) (0.04955) (0.05063) \n", "R-squared 0.03539 0.03474 0.75766 0.74612 \n", "R-squared Adj. 0.03431 0.03367 0.75739 0.74584 \n", "===========================================================================\n", "Standard errors in parentheses.\n", "* p<.1, ** p<.05, ***p<.01\n" ] } ], "source": [ "import statsmodels.formula.api as smf\n", "from statsmodels.iolib.summary2 import summary_col\n", "import numpy as np\n", "import pandas as pd\n", "\n", "# Calculate the hispanic rate and whether it's above the median\n", "AZ_df['hisp_rate'] = (AZ_df['HispanicLatinoPop'] / AZ_df['Tot.Pop']) * 100\n", "AZ_df['hisp_over_med'] = AZ_df['hisp_rate'] > np.median(AZ_df['hisp_rate'].dropna())\n", "AZ_df['hisp_over_med'] = AZ_df['hisp_over_med'].astype(int)\n", "\n", "# Calculate the log of number of applications\n", "AZ_df['log_num_apps'] = np.log(AZ_df['num_applications'])\n", "\n", "# Fit the linear regression models\n", "model1 = smf.ols('denial_rate ~ hisp_rate + C(which_bank)', data=AZ_df).fit()\n", "model2 = smf.ols('denial_rate ~ hisp_over_med + C(which_bank)', data=AZ_df).fit()\n", "model3 = smf.ols('log_num_apps ~ hisp_rate + C(which_bank)', data=AZ_df).fit()\n", "model4 = smf.ols('log_num_apps ~ hisp_over_med + C(which_bank)', data=AZ_df).fit()\n", "\n", "# Combine the regression results into a single table\n", "results_table = summary_col([model1, model2, model3, model4], \n", " model_names=['Model 1', 'Model 2', 'Model 3', 'Model 4'],\n", " regressor_order=['Intercept', 'hisp_rate', 'hisp_over_med', 'C(which_bank)[T.BankofWest]'],\n", " float_format='%0.5f',\n", " stars=True,\n", " drop_omitted=True)\n", "\n", "# Display the table\n", "print(results_table)\n", "\n" ] }, { "cell_type": "markdown", "id": "6e5fb3f4-9eb5-4a32-afb1-01bfd158a1cb", "metadata": {}, "source": [ "## Linear Regression With Interaction Terms - CA" ] }, { "cell_type": "code", "execution_count": 7, "id": "6411a2bd-66e2-4c96-8937-287b9fa885f8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "======================================================================================\n", " Model 1 Model 2 Model 3 Model 4 \n", "--------------------------------------------------------------------------------------\n", "Intercept 0.87515*** 0.86009*** 4.30888*** 4.06668*** \n", " (0.00309) (0.00261) (0.01510) (0.01321) \n", "R-squared 0.04997 0.03993 0.77070 0.75130 \n", "R-squared Adj. 0.04974 0.03969 0.77065 0.75124 \n", "hisp_over_med -0.05366*** -0.47836***\n", " (0.00355) (0.01800) \n", "hisp_over_med:which_bank[T.BankofWest] 0.09464*** 0.05368* \n", " (0.00607) (0.03078) \n", "hisp_rate -0.00115*** -0.01309*** \n", " (0.00007) (0.00033) \n", "hisp_rate:which_bank[T.BankofWest] 0.00235*** 0.00217*** \n", " (0.00012) (0.00061) \n", "which_bank[T.BankofWest] -0.12342*** -0.08930*** -3.03644*** -2.94744***\n", " (0.00509) (0.00414) (0.02489) (0.02098) \n", "N 12043 12045 12043 12045 \n", "R2 0.05 0.04 0.77 0.75 \n", "======================================================================================\n", "Standard errors in parentheses.\n", "* p<.1, ** p<.05, ***p<.01\n" ] } ], "source": [ "# Build initial models\n", "# Fit the linear regression models\n", "model1 = smf.ols('denial_rate ~ hisp_rate * which_bank', data=CA_df).fit()\n", "model2 = smf.ols('denial_rate ~ hisp_over_med * which_bank', data=CA_df).fit()\n", "model3 = smf.ols('log_num_apps ~ hisp_rate * which_bank', data=CA_df).fit()\n", "model4 = smf.ols('log_num_apps ~ hisp_over_med * which_bank', data=CA_df).fit()\n", "\n", "# Create a summary table of the regression models\n", "table = summary_col([model1, model2, model3, model4],\n", " model_names=['Model 1', 'Model 2', 'Model 3', 'Model 4'],\n", " float_format='%.5f',\n", " stars=True,\n", " info_dict={'N': lambda x: \"{0:d}\".format(int(x.nobs)),\n", " 'R2': lambda x: \"{:.2f}\".format(x.rsquared)})\n", "\n", "# Rename the table columns\n", "table.columns = ['Dep. Variable', 'Model 1', 'Model 2', 'Model 3', 'Model 4']\n", "\n", "# Display the table\n", "print(table)" ] }, { "cell_type": "markdown", "id": "3909d18a-c167-4304-a803-ebbaab6ac6e4", "metadata": {}, "source": [ "## Linear Regression with Interaction Terms - AZ" ] }, { "cell_type": "code", "execution_count": 8, "id": "374bda66-6244-415b-9b35-e502c74c637a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "======================================================================================\n", " Model 1 Model 2 Model 3 Model 4 \n", "--------------------------------------------------------------------------------------\n", "Intercept 0.85588*** 0.84835*** 4.46175*** 4.30203*** \n", " (0.00524) (0.00457) (0.03538) (0.03148) \n", "hisp_rate -0.00075*** -0.01245*** \n", " (0.00013) (0.00090) \n", "hisp_over_med -0.03088*** -0.44237***\n", " (0.00634) (0.04373) \n", "which_bank[T.BankofWest] 0.02098* 0.03791*** -3.95744*** -3.85692***\n", " (0.01186) (0.00974) (0.08003) (0.06717) \n", "hisp_rate:which_bank[T.BankofWest] 0.00079** 0.01000*** \n", " (0.00036) (0.00240) \n", "hisp_over_med:which_bank[T.BankofWest] 0.01064 0.43483*** \n", " (0.01473) (0.10157) \n", "R-squared 0.03804 0.03503 0.75999 0.74870 \n", "R-squared Adj. 0.03643 0.03341 0.75959 0.74827 \n", "R-squared 0.04 0.04 0.76 0.75 \n", "======================================================================================\n", "Standard errors in parentheses.\n", "* p<.1, ** p<.05, ***p<.01\n" ] } ], "source": [ "# Build initial models\n", "# Fit the linear regression models\n", "model1 = smf.ols('denial_rate ~ hisp_rate * which_bank', data=AZ_df).fit()\n", "model2 = smf.ols('denial_rate ~ hisp_over_med * which_bank', data=AZ_df).fit()\n", "model3 = smf.ols('log_num_apps ~ hisp_rate * which_bank', data=AZ_df).fit()\n", "model4 = smf.ols('log_num_apps ~ hisp_over_med * which_bank', data=AZ_df).fit()\n", "\n", "# Generate the summary table\n", "table = summary_col([model1, model2, model3, model4],\n", " model_names=['Model 1', 'Model 2', 'Model 3', 'Model 4'],\n", " regressor_order=['Intercept', 'hisp_rate', 'hisp_over_med', 'which_bank[T.BankofWest]', 'hisp_rate:which_bank[T.BankofWest]', 'hisp_over_med:which_bank[T.BankofWest]'],\n", " float_format='%.5f',\n", " stars=True,\n", " info_dict={'R-squared': lambda x: \"{:.2f}\".format(x.rsquared)}\n", " )\n", "\n", "# Display the summary table\n", "print(table)" ] }, { "cell_type": "code", "execution_count": null, "id": "6c7716ee-a9cd-4fc0-a318-f71d13f09582", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "ff9b9045-cbd2-4d79-a5c1-c3e6fdea8f03", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.16" } }, "nbformat": 4, "nbformat_minor": 5 }