import pandas as pd

housing = pd.read_csv("housing.csv")
corr_matrix = housing.corr()
corr_matrix["median_house_value"].sort_values(ascending=False)

# 输出
median_house_value    1.000000
median_income         0.685211
total_rooms           0.138753
housing_median_age    0.116584
Unnamed: 0            0.070593
households            0.067639
total_bedrooms        0.051664
population           -0.025932
longitude            -0.051261
latitude             -0.140618
Name: median_house_value, dtype: float64

from pandas.plotting import scatter_matrix
attributes = ["median_house_value", "median_income", "total_rooms", "housing_median_age"]
scatter_matrix(housing[attributes], figsize=(12, 8))

[视觉工程]如何确定列列之间的相关性

housing.plot(kind="scatter", x="median_income", y="median_house_value", alpha=0.1)

[视觉工程]如何确定列列之间的相关性

housing["rooms_per_household"] = housing['total_rooms']/housing['households']
housing["bedrooms_per_room"] = housing["total_bedrooms"]/housing["total_rooms"]
housing["population_per_household"] = housing["population"]/housing["households"]

corr_matrix = housing.corr()
corr_matrix["median_house_value"].sort_values(ascending=False)

# 输出
median_house_value          1.000000
median_income               0.685211
rooms_per_household         0.146777
total_rooms                 0.138753
housing_median_age          0.116584
Unnamed: 0                  0.070593
households                  0.067639
total_bedrooms              0.051664
population                 -0.025932
population_per_household   -0.029628
longitude                  -0.051261
latitude                   -0.140618
bedrooms_per_room          -0.255092
Name: median_house_value, dtype: float64

相关文章:

  • 2022-01-16
  • 2021-07-12
  • 2022-01-14
猜你喜欢
  • 2021-06-29
相关资源
相似解决方案