%matplotlib inline
import matplotlib.pyplot as plt
import requests
import pandas as pd
from StringIO import StringIO
import json
import numpy as np
import statsmodels.formula.api as sm
cities = """Atlanta,GA,33.762909,-84.422675
Austin,TX,30.303936,-97.754355
Boston,MA,42.331960,-71.020173
Chicago,IL,41.837551,-87.681844
Cleveland,OH,41.478462,-81.679435
Denver,CO,39.761850,-104.881105
Las Vegas,NV,36.229214,-115.26008
Los Angeles,CA,34.019394,-118.410825
Miami,FL,25.775163,-80.208615
Minneapolis,MN,44.963324,-93.268320
Nashville,TN,36.171800,-86.785002
New Orleans,LA,30.053420,-89.934502
New York,NY,40.663619,-73.938589
Philadelphia,PA,40.009376,-75.133346
Phoenix,AZ,33.572154,-112.090132
Salt Lake City,UT,40.778996,-111.932630
San Francisco,CA,37.727239,-123.032229
Seattle,WA,47.620499,-122.350876
Washington,DC,38.904103,-77.017229"""
citiesDf = pd.read_csv(StringIO(cities), sep=',', header=None)
citiesDf.columns = ['city', 'state', 'latitude', 'longitude']
url = 'http://api.openhouseproject.co/api/property/?offset=0&limit=1500&close_to=({},{},{})'
distance = 100
fields = ['price', 'bathrooms', 'bedrooms', 'building_size']
dfMap = {}
for i in range(citiesDf.shape[0]):
row = citiesDf.iloc[i]
city = row.city
print 'City:', city
if not(dfMap.has_key(city)):
lat = row.latitude
lng = row.longitude
s = url.format(distance, lat, lng)
r = requests.get(s)
o = json.loads(r.content)
results = o['results']
properties = []
for result in results:
data = {}
for field in fields:
data[field] = result[field]
properties.append(data)
dfMap[city] = properties
City: Atlanta City: Austin City: Boston City: Chicago City: Cleveland City: Denver City: Las Vegas City: Los Angeles City: Miami City: Minneapolis City: Nashville City: New Orleans City: New York City: Philadelphia City: Phoenix City: Salt Lake City City: San Francisco City: Seattle City: Washington
for city in dfMap.keys():
df = pd.DataFrame(dfMap[city])
df['intercept'] = 1
model = sm.ols(formula="price ~ bedrooms + building_size + intercept", data=df).fit()
print("CITY: " + city)
print model.summary()
print "\n\n\n\n"
CITY: Seattle OLS Regression Results ============================================================================== Dep. Variable: price R-squared: 0.414 Model: OLS Adj. R-squared: 0.413 Method: Least Squares F-statistic: 522.5 Date: Wed, 02 Nov 2016 Prob (F-statistic): 2.28e-172 Time: 18:06:08 Log-Likelihood: -19852. No. Observations: 1482 AIC: 3.971e+04 Df Residuals: 1479 BIC: 3.972e+04 Df Model: 2 Covariance Type: nonrobust ================================================================================= coef std err t P>|t| [95.0% Conf. Int.] --------------------------------------------------------------------------------- Intercept 4.954e+04 6276.742 7.892 0.000 3.72e+04 6.19e+04 bedrooms 2.87e+04 4450.888 6.448 0.000 2e+04 3.74e+04 building_size 97.9198 4.322 22.656 0.000 89.442 106.398 intercept 4.954e+04 6276.742 7.892 0.000 3.72e+04 6.19e+04 ============================================================================== Omnibus: 1073.381 Durbin-Watson: 1.339 Prob(Omnibus): 0.000 Jarque-Bera (JB): 25458.610 Skew: 3.102 Prob(JB): 0.00 Kurtosis: 22.334 Cond. No. 1.91e+17 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The smallest eigenvalue is 1.45e-25. This might indicate that there are strong multicollinearity problems or that the design matrix is singular. CITY: San Francisco OLS Regression Results ============================================================================== Dep. Variable: price R-squared: 0.397 Model: OLS Adj. R-squared: 0.396 Method: Least Squares F-statistic: 491.0 Date: Wed, 02 Nov 2016 Prob (F-statistic): 1.46e-164 Time: 18:06:08 Log-Likelihood: -22817. No. Observations: 1495 AIC: 4.564e+04 Df Residuals: 1492 BIC: 4.566e+04 Df Model: 2 Covariance Type: nonrobust ================================================================================= coef std err t P>|t| [95.0% Conf. Int.] --------------------------------------------------------------------------------- Intercept 9.646e+04 3.32e+04 2.910 0.004 3.14e+04 1.61e+05 bedrooms -4.246e+04 2.4e+04 -1.772 0.077 -8.95e+04 4539.866 building_size 779.2938 29.164 26.721 0.000 722.088 836.500 intercept 9.646e+04 3.32e+04 2.910 0.004 3.14e+04 1.61e+05 ============================================================================== Omnibus: 1492.900 Durbin-Watson: 1.061 Prob(Omnibus): 0.000 Jarque-Bera (JB): 97357.034 Skew: 4.673 Prob(JB): 0.00 Kurtosis: 41.413 Cond. No. 3.66e+17 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The smallest eigenvalue is 4.78e-26. This might indicate that there are strong multicollinearity problems or that the design matrix is singular. CITY: Phoenix OLS Regression Results ============================================================================== Dep. Variable: price R-squared: 0.392 Model: OLS Adj. R-squared: 0.391 Method: Least Squares F-statistic: 424.1 Date: Wed, 02 Nov 2016 Prob (F-statistic): 7.89e-143 Time: 18:06:08 Log-Likelihood: -17061. No. Observations: 1317 AIC: 3.413e+04 Df Residuals: 1314 BIC: 3.414e+04 Df Model: 2 Covariance Type: nonrobust ================================================================================= coef std err t P>|t| [95.0% Conf. Int.] --------------------------------------------------------------------------------- Intercept 5091.6859 4660.862 1.092 0.275 -4051.859 1.42e+04 bedrooms 4235.3360 2329.635 1.818 0.069 -334.874 8805.546 building_size 101.1079 3.826 26.425 0.000 93.602 108.614 intercept 5091.6859 4660.862 1.092 0.275 -4051.859 1.42e+04 ============================================================================== Omnibus: 856.770 Durbin-Watson: 1.039 Prob(Omnibus): 0.000 Jarque-Bera (JB): 26640.247 Skew: 2.526 Prob(JB): 0.00 Kurtosis: 24.446 Cond. No. 7.41e+17 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The smallest eigenvalue is 1.14e-26. This might indicate that there are strong multicollinearity problems or that the design matrix is singular. CITY: Chicago OLS Regression Results ============================================================================== Dep. Variable: price R-squared: 0.070 Model: OLS Adj. R-squared: 0.067 Method: Least Squares F-statistic: 26.10 Date: Wed, 02 Nov 2016 Prob (F-statistic): 1.18e-11 Time: 18:06:08 Log-Likelihood: -9803.0 No. Observations: 695 AIC: 1.961e+04 Df Residuals: 692 BIC: 1.963e+04 Df Model: 2 Covariance Type: nonrobust ================================================================================= coef std err t P>|t| [95.0% Conf. Int.] --------------------------------------------------------------------------------- Intercept 2.557e+04 2.23e+04 1.148 0.251 -1.82e+04 6.93e+04 bedrooms 4.166e+04 1.34e+04 3.109 0.002 1.54e+04 6.8e+04 building_size 57.5367 10.844 5.306 0.000 36.246 78.827 intercept 2.557e+04 2.23e+04 1.148 0.251 -1.82e+04 6.93e+04 ============================================================================== Omnibus: 581.727 Durbin-Watson: 0.633 Prob(Omnibus): 0.000 Jarque-Bera (JB): 13384.562 Skew: 3.699 Prob(JB): 0.00 Kurtosis: 23.186 Cond. No. 6.70e+17 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The smallest eigenvalue is 5.84e-27. This might indicate that there are strong multicollinearity problems or that the design matrix is singular. CITY: Miami OLS Regression Results ============================================================================== Dep. Variable: price R-squared: 0.749 Model: OLS Adj. R-squared: 0.749 Method: Least Squares F-statistic: 2092. Date: Wed, 02 Nov 2016 Prob (F-statistic): 0.00 Time: 18:06:08 Log-Likelihood: -20766. No. Observations: 1403 AIC: 4.154e+04 Df Residuals: 1400 BIC: 4.155e+04 Df Model: 2 Covariance Type: nonrobust ================================================================================= coef std err t P>|t| [95.0% Conf. Int.] --------------------------------------------------------------------------------- Intercept -3.795e+05 1.36e+04 -27.888 0.000 -4.06e+05 -3.53e+05 bedrooms 3.203e+05 5515.840 58.062 0.000 3.09e+05 3.31e+05 building_size 198.8681 12.313 16.150 0.000 174.713 223.023 intercept -3.795e+05 1.36e+04 -27.888 0.000 -4.06e+05 -3.53e+05 ============================================================================== Omnibus: 1479.035 Durbin-Watson: 1.672 Prob(Omnibus): 0.000 Jarque-Bera (JB): 126993.425 Skew: 4.995 Prob(JB): 0.00 Kurtosis: 48.525 Cond. No. 2.92e+17 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The smallest eigenvalue is 6.68e-26. This might indicate that there are strong multicollinearity problems or that the design matrix is singular. CITY: Boston OLS Regression Results ============================================================================== Dep. Variable: price R-squared: 0.019 Model: OLS Adj. R-squared: 0.017 Method: Least Squares F-statistic: 14.17 Date: Wed, 02 Nov 2016 Prob (F-statistic): 8.01e-07 Time: 18:06:08 Log-Likelihood: -22039. No. Observations: 1497 AIC: 4.408e+04 Df Residuals: 1494 BIC: 4.410e+04 Df Model: 2 Covariance Type: nonrobust ================================================================================= coef std err t P>|t| [95.0% Conf. Int.] --------------------------------------------------------------------------------- Intercept 2.389e+05 1.67e+04 14.326 0.000 2.06e+05 2.72e+05 bedrooms 2.277e+04 1.04e+04 2.193 0.028 2401.960 4.31e+04 building_size 23.7034 6.049 3.919 0.000 11.838 35.568 intercept 2.389e+05 1.67e+04 14.326 0.000 2.06e+05 2.72e+05 ============================================================================== Omnibus: 1396.564 Durbin-Watson: 1.411 Prob(Omnibus): 0.000 Jarque-Bera (JB): 64403.125 Skew: 4.305 Prob(JB): 0.00 Kurtosis: 33.958 Cond. No. 3.92e+17 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The smallest eigenvalue is 9.71e-26. This might indicate that there are strong multicollinearity problems or that the design matrix is singular. CITY: Nashville OLS Regression Results ============================================================================== Dep. Variable: price R-squared: 0.598 Model: OLS Adj. R-squared: 0.596 Method: Least Squares F-statistic: 312.9 Date: Wed, 02 Nov 2016 Prob (F-statistic): 5.28e-84 Time: 18:06:08 Log-Likelihood: -6182.1 No. Observations: 424 AIC: 1.237e+04 Df Residuals: 421 BIC: 1.238e+04 Df Model: 2 Covariance Type: nonrobust ================================================================================= coef std err t P>|t| [95.0% Conf. Int.] --------------------------------------------------------------------------------- Intercept -1.195e+05 4e+04 -2.989 0.003 -1.98e+05 -4.09e+04 bedrooms 7.913e+04 2.58e+04 3.062 0.002 2.83e+04 1.3e+05 building_size 247.8825 14.882 16.657 0.000 218.631 277.134 intercept -1.195e+05 4e+04 -2.989 0.003 -1.98e+05 -4.09e+04 ============================================================================== Omnibus: 238.081 Durbin-Watson: 1.467 Prob(Omnibus): 0.000 Jarque-Bera (JB): 2218.749 Skew: 2.246 Prob(JB): 0.00 Kurtosis: 13.267 Cond. No. 4.09e+18 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The smallest eigenvalue is 4.45e-28. This might indicate that there are strong multicollinearity problems or that the design matrix is singular. CITY: Washington OLS Regression Results ============================================================================== Dep. Variable: price R-squared: 0.243 Model: OLS Adj. R-squared: 0.242 Method: Least Squares F-statistic: 189.4 Date: Wed, 02 Nov 2016 Prob (F-statistic): 4.50e-72 Time: 18:06:08 Log-Likelihood: -16288. No. Observations: 1183 AIC: 3.258e+04 Df Residuals: 1180 BIC: 3.260e+04 Df Model: 2 Covariance Type: nonrobust ================================================================================= coef std err t P>|t| [95.0% Conf. Int.] --------------------------------------------------------------------------------- Intercept -9520.0959 1.2e+04 -0.794 0.427 -3.3e+04 1.4e+04 bedrooms 1.114e+04 8401.710 1.326 0.185 -5345.097 2.76e+04 building_size 138.5637 8.895 15.577 0.000 121.111 156.016 intercept -9520.0959 1.2e+04 -0.794 0.427 -3.3e+04 1.4e+04 ============================================================================== Omnibus: 1973.903 Durbin-Watson: 0.720 Prob(Omnibus): 0.000 Jarque-Bera (JB): 1282925.457 Skew: 10.688 Prob(JB): 0.00 Kurtosis: 162.907 Cond. No. 4.77e+17 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The smallest eigenvalue is 1.96e-26. This might indicate that there are strong multicollinearity problems or that the design matrix is singular. CITY: Philadelphia OLS Regression Results ============================================================================== Dep. Variable: price R-squared: 0.043 Model: OLS Adj. R-squared: 0.042 Method: Least Squares F-statistic: 28.06 Date: Wed, 02 Nov 2016 Prob (F-statistic): 1.20e-12 Time: 18:06:08 Log-Likelihood: -18058. No. Observations: 1247 AIC: 3.612e+04 Df Residuals: 1244 BIC: 3.614e+04 Df Model: 2 Covariance Type: nonrobust ================================================================================= coef std err t P>|t| [95.0% Conf. Int.] --------------------------------------------------------------------------------- Intercept 5103.7935 2.06e+04 0.248 0.804 -3.52e+04 4.55e+04 bedrooms 5.661e+04 1.23e+04 4.597 0.000 3.25e+04 8.08e+04 building_size 57.0294 12.614 4.521 0.000 32.282 81.777 intercept 5103.7935 2.06e+04 0.248 0.804 -3.52e+04 4.55e+04 ============================================================================== Omnibus: 2006.943 Durbin-Watson: 1.215 Prob(Omnibus): 0.000 Jarque-Bera (JB): 1345255.345 Skew: 9.904 Prob(JB): 0.00 Kurtosis: 162.683 Cond. No. 7.51e+17 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The smallest eigenvalue is 7.02e-27. This might indicate that there are strong multicollinearity problems or that the design matrix is singular. CITY: Denver OLS Regression Results ============================================================================== Dep. Variable: price R-squared: 0.141 Model: OLS Adj. R-squared: 0.140 Method: Least Squares F-statistic: 121.7 Date: Wed, 02 Nov 2016 Prob (F-statistic): 1.11e-49 Time: 18:06:08 Log-Likelihood: -22102. No. Observations: 1485 AIC: 4.421e+04 Df Residuals: 1482 BIC: 4.423e+04 Df Model: 2 Covariance Type: nonrobust ================================================================================= coef std err t P>|t| [95.0% Conf. Int.] --------------------------------------------------------------------------------- Intercept 1.067e+05 2.5e+04 4.272 0.000 5.77e+04 1.56e+05 bedrooms 2.319e+04 1.84e+04 1.259 0.208 -1.29e+04 5.93e+04 building_size 151.1385 13.440 11.245 0.000 124.775 177.502 intercept 1.067e+05 2.5e+04 4.272 0.000 5.77e+04 1.56e+05 ============================================================================== Omnibus: 2996.628 Durbin-Watson: 1.562 Prob(Omnibus): 0.000 Jarque-Bera (JB): 8917349.471 Skew: 15.727 Prob(JB): 0.00 Kurtosis: 381.325 Cond. No. 5.28e+17 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The smallest eigenvalue is 4.21e-26. This might indicate that there are strong multicollinearity problems or that the design matrix is singular. CITY: Las Vegas OLS Regression Results ============================================================================== Dep. Variable: price R-squared: -inf Model: OLS Adj. R-squared: nan Method: Least Squares F-statistic: nan Date: Wed, 02 Nov 2016 Prob (F-statistic): nan Time: 18:06:08 Log-Likelihood: 20.069 No. Observations: 1 AIC: -38.14 Df Residuals: 0 BIC: -40.14 Df Model: 0 Covariance Type: nonrobust ================================================================================= coef std err t P>|t| [95.0% Conf. Int.] --------------------------------------------------------------------------------- Intercept 0.3258 inf 0 nan nan nan bedrooms 1.3033 inf 0 nan nan nan building_size 902.5250 inf 0 nan nan nan intercept 0.3258 inf 0 nan nan nan ============================================================================== Omnibus: nan Durbin-Watson: 0.000 Prob(Omnibus): nan Jarque-Bera (JB): 0.375 Skew: 0.000 Prob(JB): 0.829 Kurtosis: 0.000 Cond. No. 1.00 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The input rank is higher than the number of observations. CITY: Salt Lake City OLS Regression Results ============================================================================== Dep. Variable: price R-squared: 0.565 Model: OLS Adj. R-squared: 0.562 Method: Least Squares F-statistic: 181.8 Date: Wed, 02 Nov 2016 Prob (F-statistic): 2.50e-51 Time: 18:06:08 Log-Likelihood: -3699.8 No. Observations: 283 AIC: 7406. Df Residuals: 280 BIC: 7416. Df Model: 2 Covariance Type: nonrobust ================================================================================= coef std err t P>|t| [95.0% Conf. Int.] --------------------------------------------------------------------------------- Intercept 5890.6616 1.25e+04 0.473 0.637 -1.86e+04 3.04e+04 bedrooms 9816.9175 6620.289 1.483 0.139 -3214.940 2.28e+04 building_size 168.0871 10.134 16.586 0.000 148.138 188.036 intercept 5890.6616 1.25e+04 0.473 0.637 -1.86e+04 3.04e+04 ============================================================================== Omnibus: 242.379 Durbin-Watson: 1.378 Prob(Omnibus): 0.000 Jarque-Bera (JB): 9003.112 Skew: 3.125 Prob(JB): 0.00 Kurtosis: 29.916 Cond. No. 8.32e+17 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The smallest eigenvalue is 1.41e-27. This might indicate that there are strong multicollinearity problems or that the design matrix is singular. CITY: Minneapolis OLS Regression Results ============================================================================== Dep. Variable: price R-squared: 0.242 Model: OLS Adj. R-squared: 0.241 Method: Least Squares F-statistic: 232.4 Date: Wed, 02 Nov 2016 Prob (F-statistic): 2.43e-88 Time: 18:06:08 Log-Likelihood: -19274. No. Observations: 1460 AIC: 3.855e+04 Df Residuals: 1457 BIC: 3.857e+04 Df Model: 2 Covariance Type: nonrobust ================================================================================= coef std err t P>|t| [95.0% Conf. Int.] --------------------------------------------------------------------------------- Intercept 9070.9843 6044.126 1.501 0.134 -2785.133 2.09e+04 bedrooms 6.666e+04 3507.031 19.007 0.000 5.98e+04 7.35e+04 building_size 174.5718 17.783 9.817 0.000 139.689 209.454 intercept 9070.9843 6044.126 1.501 0.134 -2785.133 2.09e+04 ============================================================================== Omnibus: 741.784 Durbin-Watson: 1.259 Prob(Omnibus): 0.000 Jarque-Bera (JB): 10954.992 Skew: 2.003 Prob(JB): 0.00 Kurtosis: 15.808 Cond. No. 2.35e+16 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The smallest eigenvalue is 9.97e-26. This might indicate that there are strong multicollinearity problems or that the design matrix is singular. CITY: Los Angeles OLS Regression Results ============================================================================== Dep. Variable: price R-squared: 0.419 Model: OLS Adj. R-squared: 0.418 Method: Least Squares F-statistic: 528.0 Date: Wed, 02 Nov 2016 Prob (F-statistic): 2.20e-173 Time: 18:06:08 Log-Likelihood: -21369. No. Observations: 1467 AIC: 4.274e+04 Df Residuals: 1464 BIC: 4.276e+04 Df Model: 2 Covariance Type: nonrobust ================================================================================= coef std err t P>|t| [95.0% Conf. Int.] --------------------------------------------------------------------------------- Intercept -1.119e+05 2.15e+04 -5.193 0.000 -1.54e+05 -6.96e+04 bedrooms 1.489e+05 1.27e+04 11.752 0.000 1.24e+05 1.74e+05 building_size 259.3579 10.626 24.407 0.000 238.514 280.202 intercept -1.119e+05 2.15e+04 -5.193 0.000 -1.54e+05 -6.96e+04 ============================================================================== Omnibus: 1462.600 Durbin-Watson: 1.551 Prob(Omnibus): 0.000 Jarque-Bera (JB): 94763.364 Skew: 4.658 Prob(JB): 0.00 Kurtosis: 41.256 Cond. No. 2.99e+17 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The smallest eigenvalue is 7.15e-26. This might indicate that there are strong multicollinearity problems or that the design matrix is singular. CITY: Cleveland OLS Regression Results ============================================================================== Dep. Variable: price R-squared: 0.249 Model: OLS Adj. R-squared: 0.248 Method: Least Squares F-statistic: 247.2 Date: Wed, 02 Nov 2016 Prob (F-statistic): 1.76e-93 Time: 18:06:08 Log-Likelihood: -19076. No. Observations: 1498 AIC: 3.816e+04 Df Residuals: 1495 BIC: 3.817e+04 Df Model: 2 Covariance Type: nonrobust ================================================================================= coef std err t P>|t| [95.0% Conf. Int.] --------------------------------------------------------------------------------- Intercept 9369.5884 4172.535 2.246 0.025 1184.944 1.76e+04 bedrooms 1.147e+04 2797.122 4.102 0.000 5986.303 1.7e+04 building_size 47.7234 2.472 19.307 0.000 42.875 52.572 intercept 9369.5884 4172.535 2.246 0.025 1184.944 1.76e+04 ============================================================================== Omnibus: 1324.877 Durbin-Watson: 1.062 Prob(Omnibus): 0.000 Jarque-Bera (JB): 91897.289 Skew: 3.769 Prob(JB): 0.00 Kurtosis: 40.623 Cond. No. 2.44e+17 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The smallest eigenvalue is 5.91e-26. This might indicate that there are strong multicollinearity problems or that the design matrix is singular. CITY: New York OLS Regression Results ============================================================================== Dep. Variable: price R-squared: 0.223 Model: OLS Adj. R-squared: 0.219 Method: Least Squares F-statistic: 52.37 Date: Wed, 02 Nov 2016 Prob (F-statistic): 1.03e-20 Time: 18:06:08 Log-Likelihood: -5570.8 No. Observations: 367 AIC: 1.115e+04 Df Residuals: 364 BIC: 1.116e+04 Df Model: 2 Covariance Type: nonrobust ================================================================================= coef std err t P>|t| [95.0% Conf. Int.] --------------------------------------------------------------------------------- Intercept 4.302e+04 5.58e+04 0.771 0.441 -6.67e+04 1.53e+05 bedrooms 5.004e+04 3.07e+04 1.632 0.103 -1.02e+04 1.1e+05 building_size 416.3654 42.984 9.686 0.000 331.837 500.894 intercept 4.302e+04 5.58e+04 0.771 0.441 -6.67e+04 1.53e+05 ============================================================================== Omnibus: 613.196 Durbin-Watson: 1.788 Prob(Omnibus): 0.000 Jarque-Bera (JB): 257324.727 Skew: 9.287 Prob(JB): 0.00 Kurtosis: 131.385 Cond. No. 2.74e+18 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The smallest eigenvalue is 1.05e-28. This might indicate that there are strong multicollinearity problems or that the design matrix is singular. CITY: Atlanta OLS Regression Results ============================================================================== Dep. Variable: price R-squared: 0.258 Model: OLS Adj. R-squared: 0.256 Method: Least Squares F-statistic: 230.3 Date: Wed, 02 Nov 2016 Prob (F-statistic): 1.37e-86 Time: 18:06:08 Log-Likelihood: -17684. No. Observations: 1331 AIC: 3.537e+04 Df Residuals: 1328 BIC: 3.539e+04 Df Model: 2 Covariance Type: nonrobust ================================================================================= coef std err t P>|t| [95.0% Conf. Int.] --------------------------------------------------------------------------------- Intercept -4.466e+04 7051.345 -6.334 0.000 -5.85e+04 -3.08e+04 bedrooms 7.369e+04 4236.452 17.395 0.000 6.54e+04 8.2e+04 building_size 20.1355 3.292 6.117 0.000 13.677 26.593 intercept -4.466e+04 7051.345 -6.334 0.000 -5.85e+04 -3.08e+04 ============================================================================== Omnibus: 1959.891 Durbin-Watson: 1.379 Prob(Omnibus): 0.000 Jarque-Bera (JB): 840536.473 Skew: 8.445 Prob(JB): 0.00 Kurtosis: 124.946 Cond. No. 1.76e+17 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The smallest eigenvalue is 1e-25. This might indicate that there are strong multicollinearity problems or that the design matrix is singular. CITY: New Orleans OLS Regression Results ============================================================================== Dep. Variable: price R-squared: 0.154 Model: OLS Adj. R-squared: 0.149 Method: Least Squares F-statistic: 27.11 Date: Wed, 02 Nov 2016 Prob (F-statistic): 1.53e-11 Time: 18:06:08 Log-Likelihood: -4079.0 No. Observations: 300 AIC: 8164. Df Residuals: 297 BIC: 8175. Df Model: 2 Covariance Type: nonrobust ================================================================================= coef std err t P>|t| [95.0% Conf. Int.] --------------------------------------------------------------------------------- Intercept 8.078e+04 1.82e+04 4.450 0.000 4.51e+04 1.16e+05 bedrooms -5.855e+04 1.46e+04 -4.011 0.000 -8.73e+04 -2.98e+04 building_size 139.0080 19.311 7.198 0.000 101.005 177.011 intercept 8.078e+04 1.82e+04 4.450 0.000 4.51e+04 1.16e+05 ============================================================================== Omnibus: 353.767 Durbin-Watson: 0.492 Prob(Omnibus): 0.000 Jarque-Bera (JB): 15540.756 Skew: 5.365 Prob(JB): 0.00 Kurtosis: 36.588 Cond. No. 1.29e+19 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The smallest eigenvalue is 7.49e-30. This might indicate that there are strong multicollinearity problems or that the design matrix is singular. CITY: Austin OLS Regression Results ============================================================================== Dep. Variable: price R-squared: 0.766 Model: OLS Adj. R-squared: 0.757 Method: Least Squares F-statistic: 81.79 Date: Wed, 02 Nov 2016 Prob (F-statistic): 1.72e-16 Time: 18:06:08 Log-Likelihood: -693.24 No. Observations: 53 AIC: 1392. Df Residuals: 50 BIC: 1398. Df Model: 2 Covariance Type: nonrobust ================================================================================= coef std err t P>|t| [95.0% Conf. Int.] --------------------------------------------------------------------------------- Intercept -1.426e+04 4.2e+04 -0.339 0.736 -9.87e+04 7.01e+04 bedrooms -6.356e+04 3.27e+04 -1.947 0.057 -1.29e+05 2026.475 building_size 255.4130 25.686 9.944 0.000 203.821 307.005 intercept -1.426e+04 4.2e+04 -0.339 0.736 -9.87e+04 7.01e+04 ============================================================================== Omnibus: 7.991 Durbin-Watson: 2.194 Prob(Omnibus): 0.018 Jarque-Bera (JB): 7.117 Skew: 0.828 Prob(JB): 0.0285 Kurtosis: 3.692 Cond. No. 2.81e+19 ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified. [2] The smallest eigenvalue is 5.97e-31. This might indicate that there are strong multicollinearity problems or that the design matrix is singular.
As you can see above, the fits need more work to remove multi-colinearity and address cases where some coefficients have low confidence. But for a quick analysis, let\'s continue.
params = []
for city in dfMap.keys():
df = pd.DataFrame(dfMap[city])
df['intercept'] = 1
model = sm.ols(formula="price ~ bedrooms + building_size + intercept", data=df).fit()
d = dict(model.params)
d['rsquared'] = model.rsquared
d['city'] = city
params.append(d)
res = pd.DataFrame(params)
res.sort('building_size', inplace=True)
res.index = np.arange(res.shape[0])
/usr/local/lib/python2.7/site-packages/ipykernel/__main__.py:2: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....) from ipykernel import kernelapp as app
plt.figure(figsize=(4,6))
plt.barh(res.index, res['building_size'])
plt.yticks(res.index + 0.4, res['city'])
plt.xlabel('Coeficient of building_size')
plt.plot()
[]
Your trusted podcast, centered on data science, machine learning, and artificial intelligence.