1037 lines
1.3 MiB
Plaintext
1037 lines
1.3 MiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "407d4131-b3c5-4b66-a214-baf5ee98aae3",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Install"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 21,
|
|||
|
"id": "cd0826ea-da42-4fd3-90a9-abc13ed05203",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Requirement already satisfied: scikit-learn in /opt/conda/lib/python3.11/site-packages (1.4.0)\n",
|
|||
|
"Requirement already satisfied: numpy<2.0,>=1.19.5 in /opt/conda/lib/python3.11/site-packages (from scikit-learn) (1.26.3)\n",
|
|||
|
"Requirement already satisfied: scipy>=1.6.0 in /opt/conda/lib/python3.11/site-packages (from scikit-learn) (1.12.0)\n",
|
|||
|
"Requirement already satisfied: joblib>=1.2.0 in /opt/conda/lib/python3.11/site-packages (from scikit-learn) (1.3.2)\n",
|
|||
|
"Requirement already satisfied: threadpoolctl>=2.0.0 in /opt/conda/lib/python3.11/site-packages (from scikit-learn) (3.2.0)\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"!pip install scikit-learn"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "ad1b4816-64f8-427f-8af4-b862d7561bfb",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Import california housing dataset\n",
|
|||
|
"\n",
|
|||
|
"print out dataset --> in JSON format"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 22,
|
|||
|
"id": "9a456fe9-2741-43e3-b3d1-f8f98595a466",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"{'data': array([[ 8.3252 , 41. , 6.98412698, ..., 2.55555556,\n",
|
|||
|
" 37.88 , -122.23 ],\n",
|
|||
|
" [ 8.3014 , 21. , 6.23813708, ..., 2.10984183,\n",
|
|||
|
" 37.86 , -122.22 ],\n",
|
|||
|
" [ 7.2574 , 52. , 8.28813559, ..., 2.80225989,\n",
|
|||
|
" 37.85 , -122.24 ],\n",
|
|||
|
" ...,\n",
|
|||
|
" [ 1.7 , 17. , 5.20554273, ..., 2.3256351 ,\n",
|
|||
|
" 39.43 , -121.22 ],\n",
|
|||
|
" [ 1.8672 , 18. , 5.32951289, ..., 2.12320917,\n",
|
|||
|
" 39.43 , -121.32 ],\n",
|
|||
|
" [ 2.3886 , 16. , 5.25471698, ..., 2.61698113,\n",
|
|||
|
" 39.37 , -121.24 ]]),\n",
|
|||
|
" 'target': array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894]),\n",
|
|||
|
" 'frame': None,\n",
|
|||
|
" 'target_names': ['MedHouseVal'],\n",
|
|||
|
" 'feature_names': ['MedInc',\n",
|
|||
|
" 'HouseAge',\n",
|
|||
|
" 'AveRooms',\n",
|
|||
|
" 'AveBedrms',\n",
|
|||
|
" 'Population',\n",
|
|||
|
" 'AveOccup',\n",
|
|||
|
" 'Latitude',\n",
|
|||
|
" 'Longitude'],\n",
|
|||
|
" 'DESCR': '.. _california_housing_dataset:\\n\\nCalifornia Housing dataset\\n--------------------------\\n\\n**Data Set Characteristics:**\\n\\n:Number of Instances: 20640\\n\\n:Number of Attributes: 8 numeric, predictive attributes and the target\\n\\n:Attribute Information:\\n - MedInc median income in block group\\n - HouseAge median house age in block group\\n - AveRooms average number of rooms per household\\n - AveBedrms average number of bedrooms per household\\n - Population block group population\\n - AveOccup average number of household members\\n - Latitude block group latitude\\n - Longitude block group longitude\\n\\n:Missing Attribute Values: None\\n\\nThis dataset was obtained from the StatLib repository.\\nhttps://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html\\n\\nThe target variable is the median house value for California districts,\\nexpressed in hundreds of thousands of dollars ($100,000).\\n\\nThis dataset was derived from the 1990 U.S. census, using one row per census\\nblock group. A block group is the smallest geographical unit for which the U.S.\\nCensus Bureau publishes sample data (a block group typically has a population\\nof 600 to 3,000 people).\\n\\nA household is a group of people residing within a home. Since the average\\nnumber of rooms and bedrooms in this dataset are provided per household, these\\ncolumns may take surprisingly large values for block groups with few households\\nand many empty houses, such as vacation resorts.\\n\\nIt can be downloaded/loaded using the\\n:func:`sklearn.datasets.fetch_california_housing` function.\\n\\n.. topic:: References\\n\\n - Pace, R. Kelley and Ronald Barry, Sparse Spatial Autoregressions,\\n Statistics and Probability Letters, 33 (1997) 291-297\\n'}"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 22,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from sklearn.datasets import fetch_california_housing\n",
|
|||
|
"housing = fetch_california_housing()\n",
|
|||
|
"housing"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "e52ae3d2-8d34-4569-aa24-00269f9f51a0",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### show keys of dataset\n",
|
|||
|
"show all the keys of the dataset, to get an overview, what's inside the dataset\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 23,
|
|||
|
"id": "8911162c-2a9a-43aa-9cad-362dedb43b13",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"dict_keys(['data', 'target', 'frame', 'target_names', 'feature_names', 'DESCR'])\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"#show the keys of the JSON california housing\n",
|
|||
|
"print(housing.keys())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "ac4cc0f9-cc2e-4c90-865a-6731be8964a5",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### import pandas\n",
|
|||
|
"convert the dataset from a json format to a pandas dataframe \n",
|
|||
|
"!import housing.data into the rows and feature_names as column names"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 24,
|
|||
|
"id": "19373cf8-4274-4528-a82b-809871ef7008",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>MedInc</th>\n",
|
|||
|
" <th>HouseAge</th>\n",
|
|||
|
" <th>AveRooms</th>\n",
|
|||
|
" <th>AveBedrms</th>\n",
|
|||
|
" <th>Population</th>\n",
|
|||
|
" <th>AveOccup</th>\n",
|
|||
|
" <th>Latitude</th>\n",
|
|||
|
" <th>Longitude</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>8.3252</td>\n",
|
|||
|
" <td>41.0</td>\n",
|
|||
|
" <td>6.984127</td>\n",
|
|||
|
" <td>1.023810</td>\n",
|
|||
|
" <td>322.0</td>\n",
|
|||
|
" <td>2.555556</td>\n",
|
|||
|
" <td>37.88</td>\n",
|
|||
|
" <td>-122.23</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>8.3014</td>\n",
|
|||
|
" <td>21.0</td>\n",
|
|||
|
" <td>6.238137</td>\n",
|
|||
|
" <td>0.971880</td>\n",
|
|||
|
" <td>2401.0</td>\n",
|
|||
|
" <td>2.109842</td>\n",
|
|||
|
" <td>37.86</td>\n",
|
|||
|
" <td>-122.22</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>7.2574</td>\n",
|
|||
|
" <td>52.0</td>\n",
|
|||
|
" <td>8.288136</td>\n",
|
|||
|
" <td>1.073446</td>\n",
|
|||
|
" <td>496.0</td>\n",
|
|||
|
" <td>2.802260</td>\n",
|
|||
|
" <td>37.85</td>\n",
|
|||
|
" <td>-122.24</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>5.6431</td>\n",
|
|||
|
" <td>52.0</td>\n",
|
|||
|
" <td>5.817352</td>\n",
|
|||
|
" <td>1.073059</td>\n",
|
|||
|
" <td>558.0</td>\n",
|
|||
|
" <td>2.547945</td>\n",
|
|||
|
" <td>37.85</td>\n",
|
|||
|
" <td>-122.25</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>3.8462</td>\n",
|
|||
|
" <td>52.0</td>\n",
|
|||
|
" <td>6.281853</td>\n",
|
|||
|
" <td>1.081081</td>\n",
|
|||
|
" <td>565.0</td>\n",
|
|||
|
" <td>2.181467</td>\n",
|
|||
|
" <td>37.85</td>\n",
|
|||
|
" <td>-122.25</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>20635</th>\n",
|
|||
|
" <td>1.5603</td>\n",
|
|||
|
" <td>25.0</td>\n",
|
|||
|
" <td>5.045455</td>\n",
|
|||
|
" <td>1.133333</td>\n",
|
|||
|
" <td>845.0</td>\n",
|
|||
|
" <td>2.560606</td>\n",
|
|||
|
" <td>39.48</td>\n",
|
|||
|
" <td>-121.09</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>20636</th>\n",
|
|||
|
" <td>2.5568</td>\n",
|
|||
|
" <td>18.0</td>\n",
|
|||
|
" <td>6.114035</td>\n",
|
|||
|
" <td>1.315789</td>\n",
|
|||
|
" <td>356.0</td>\n",
|
|||
|
" <td>3.122807</td>\n",
|
|||
|
" <td>39.49</td>\n",
|
|||
|
" <td>-121.21</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>20637</th>\n",
|
|||
|
" <td>1.7000</td>\n",
|
|||
|
" <td>17.0</td>\n",
|
|||
|
" <td>5.205543</td>\n",
|
|||
|
" <td>1.120092</td>\n",
|
|||
|
" <td>1007.0</td>\n",
|
|||
|
" <td>2.325635</td>\n",
|
|||
|
" <td>39.43</td>\n",
|
|||
|
" <td>-121.22</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>20638</th>\n",
|
|||
|
" <td>1.8672</td>\n",
|
|||
|
" <td>18.0</td>\n",
|
|||
|
" <td>5.329513</td>\n",
|
|||
|
" <td>1.171920</td>\n",
|
|||
|
" <td>741.0</td>\n",
|
|||
|
" <td>2.123209</td>\n",
|
|||
|
" <td>39.43</td>\n",
|
|||
|
" <td>-121.32</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>20639</th>\n",
|
|||
|
" <td>2.3886</td>\n",
|
|||
|
" <td>16.0</td>\n",
|
|||
|
" <td>5.254717</td>\n",
|
|||
|
" <td>1.162264</td>\n",
|
|||
|
" <td>1387.0</td>\n",
|
|||
|
" <td>2.616981</td>\n",
|
|||
|
" <td>39.37</td>\n",
|
|||
|
" <td>-121.24</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>20640 rows × 8 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" MedInc HouseAge AveRooms AveBedrms Population AveOccup Latitude \\\n",
|
|||
|
"0 8.3252 41.0 6.984127 1.023810 322.0 2.555556 37.88 \n",
|
|||
|
"1 8.3014 21.0 6.238137 0.971880 2401.0 2.109842 37.86 \n",
|
|||
|
"2 7.2574 52.0 8.288136 1.073446 496.0 2.802260 37.85 \n",
|
|||
|
"3 5.6431 52.0 5.817352 1.073059 558.0 2.547945 37.85 \n",
|
|||
|
"4 3.8462 52.0 6.281853 1.081081 565.0 2.181467 37.85 \n",
|
|||
|
"... ... ... ... ... ... ... ... \n",
|
|||
|
"20635 1.5603 25.0 5.045455 1.133333 845.0 2.560606 39.48 \n",
|
|||
|
"20636 2.5568 18.0 6.114035 1.315789 356.0 3.122807 39.49 \n",
|
|||
|
"20637 1.7000 17.0 5.205543 1.120092 1007.0 2.325635 39.43 \n",
|
|||
|
"20638 1.8672 18.0 5.329513 1.171920 741.0 2.123209 39.43 \n",
|
|||
|
"20639 2.3886 16.0 5.254717 1.162264 1387.0 2.616981 39.37 \n",
|
|||
|
"\n",
|
|||
|
" Longitude \n",
|
|||
|
"0 -122.23 \n",
|
|||
|
"1 -122.22 \n",
|
|||
|
"2 -122.24 \n",
|
|||
|
"3 -122.25 \n",
|
|||
|
"4 -122.25 \n",
|
|||
|
"... ... \n",
|
|||
|
"20635 -121.09 \n",
|
|||
|
"20636 -121.21 \n",
|
|||
|
"20637 -121.22 \n",
|
|||
|
"20638 -121.32 \n",
|
|||
|
"20639 -121.24 \n",
|
|||
|
"\n",
|
|||
|
"[20640 rows x 8 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 24,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd \n",
|
|||
|
"cal_housing = pd.DataFrame(housing.data, columns=housing.feature_names) ## definition of rows and columns\n",
|
|||
|
"cal_housing ## show dataframe"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 25,
|
|||
|
"id": "b9c0f611-2e5a-4006-881b-dc2a0a397df2",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"(20640, 8)"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 25,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"cal_housing.shape ## show shape of dataframe, means 20640 rows by 8 columns"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 26,
|
|||
|
"id": "48477f2c-27ae-458c-a38a-a2d28b3c05bd",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>MedInc</th>\n",
|
|||
|
" <th>HouseAge</th>\n",
|
|||
|
" <th>AveRooms</th>\n",
|
|||
|
" <th>AveBedrms</th>\n",
|
|||
|
" <th>Population</th>\n",
|
|||
|
" <th>AveOccup</th>\n",
|
|||
|
" <th>Latitude</th>\n",
|
|||
|
" <th>Longitude</th>\n",
|
|||
|
" <th>MedHouseVal</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>8.3252</td>\n",
|
|||
|
" <td>41.0</td>\n",
|
|||
|
" <td>6.984127</td>\n",
|
|||
|
" <td>1.023810</td>\n",
|
|||
|
" <td>322.0</td>\n",
|
|||
|
" <td>2.555556</td>\n",
|
|||
|
" <td>37.88</td>\n",
|
|||
|
" <td>-122.23</td>\n",
|
|||
|
" <td>4.526</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>8.3014</td>\n",
|
|||
|
" <td>21.0</td>\n",
|
|||
|
" <td>6.238137</td>\n",
|
|||
|
" <td>0.971880</td>\n",
|
|||
|
" <td>2401.0</td>\n",
|
|||
|
" <td>2.109842</td>\n",
|
|||
|
" <td>37.86</td>\n",
|
|||
|
" <td>-122.22</td>\n",
|
|||
|
" <td>3.585</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>7.2574</td>\n",
|
|||
|
" <td>52.0</td>\n",
|
|||
|
" <td>8.288136</td>\n",
|
|||
|
" <td>1.073446</td>\n",
|
|||
|
" <td>496.0</td>\n",
|
|||
|
" <td>2.802260</td>\n",
|
|||
|
" <td>37.85</td>\n",
|
|||
|
" <td>-122.24</td>\n",
|
|||
|
" <td>3.521</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>5.6431</td>\n",
|
|||
|
" <td>52.0</td>\n",
|
|||
|
" <td>5.817352</td>\n",
|
|||
|
" <td>1.073059</td>\n",
|
|||
|
" <td>558.0</td>\n",
|
|||
|
" <td>2.547945</td>\n",
|
|||
|
" <td>37.85</td>\n",
|
|||
|
" <td>-122.25</td>\n",
|
|||
|
" <td>3.413</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>3.8462</td>\n",
|
|||
|
" <td>52.0</td>\n",
|
|||
|
" <td>6.281853</td>\n",
|
|||
|
" <td>1.081081</td>\n",
|
|||
|
" <td>565.0</td>\n",
|
|||
|
" <td>2.181467</td>\n",
|
|||
|
" <td>37.85</td>\n",
|
|||
|
" <td>-122.25</td>\n",
|
|||
|
" <td>3.422</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>20635</th>\n",
|
|||
|
" <td>1.5603</td>\n",
|
|||
|
" <td>25.0</td>\n",
|
|||
|
" <td>5.045455</td>\n",
|
|||
|
" <td>1.133333</td>\n",
|
|||
|
" <td>845.0</td>\n",
|
|||
|
" <td>2.560606</td>\n",
|
|||
|
" <td>39.48</td>\n",
|
|||
|
" <td>-121.09</td>\n",
|
|||
|
" <td>0.781</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>20636</th>\n",
|
|||
|
" <td>2.5568</td>\n",
|
|||
|
" <td>18.0</td>\n",
|
|||
|
" <td>6.114035</td>\n",
|
|||
|
" <td>1.315789</td>\n",
|
|||
|
" <td>356.0</td>\n",
|
|||
|
" <td>3.122807</td>\n",
|
|||
|
" <td>39.49</td>\n",
|
|||
|
" <td>-121.21</td>\n",
|
|||
|
" <td>0.771</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>20637</th>\n",
|
|||
|
" <td>1.7000</td>\n",
|
|||
|
" <td>17.0</td>\n",
|
|||
|
" <td>5.205543</td>\n",
|
|||
|
" <td>1.120092</td>\n",
|
|||
|
" <td>1007.0</td>\n",
|
|||
|
" <td>2.325635</td>\n",
|
|||
|
" <td>39.43</td>\n",
|
|||
|
" <td>-121.22</td>\n",
|
|||
|
" <td>0.923</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>20638</th>\n",
|
|||
|
" <td>1.8672</td>\n",
|
|||
|
" <td>18.0</td>\n",
|
|||
|
" <td>5.329513</td>\n",
|
|||
|
" <td>1.171920</td>\n",
|
|||
|
" <td>741.0</td>\n",
|
|||
|
" <td>2.123209</td>\n",
|
|||
|
" <td>39.43</td>\n",
|
|||
|
" <td>-121.32</td>\n",
|
|||
|
" <td>0.847</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>20639</th>\n",
|
|||
|
" <td>2.3886</td>\n",
|
|||
|
" <td>16.0</td>\n",
|
|||
|
" <td>5.254717</td>\n",
|
|||
|
" <td>1.162264</td>\n",
|
|||
|
" <td>1387.0</td>\n",
|
|||
|
" <td>2.616981</td>\n",
|
|||
|
" <td>39.37</td>\n",
|
|||
|
" <td>-121.24</td>\n",
|
|||
|
" <td>0.894</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>20640 rows × 9 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" MedInc HouseAge AveRooms AveBedrms Population AveOccup Latitude \\\n",
|
|||
|
"0 8.3252 41.0 6.984127 1.023810 322.0 2.555556 37.88 \n",
|
|||
|
"1 8.3014 21.0 6.238137 0.971880 2401.0 2.109842 37.86 \n",
|
|||
|
"2 7.2574 52.0 8.288136 1.073446 496.0 2.802260 37.85 \n",
|
|||
|
"3 5.6431 52.0 5.817352 1.073059 558.0 2.547945 37.85 \n",
|
|||
|
"4 3.8462 52.0 6.281853 1.081081 565.0 2.181467 37.85 \n",
|
|||
|
"... ... ... ... ... ... ... ... \n",
|
|||
|
"20635 1.5603 25.0 5.045455 1.133333 845.0 2.560606 39.48 \n",
|
|||
|
"20636 2.5568 18.0 6.114035 1.315789 356.0 3.122807 39.49 \n",
|
|||
|
"20637 1.7000 17.0 5.205543 1.120092 1007.0 2.325635 39.43 \n",
|
|||
|
"20638 1.8672 18.0 5.329513 1.171920 741.0 2.123209 39.43 \n",
|
|||
|
"20639 2.3886 16.0 5.254717 1.162264 1387.0 2.616981 39.37 \n",
|
|||
|
"\n",
|
|||
|
" Longitude MedHouseVal \n",
|
|||
|
"0 -122.23 4.526 \n",
|
|||
|
"1 -122.22 3.585 \n",
|
|||
|
"2 -122.24 3.521 \n",
|
|||
|
"3 -122.25 3.413 \n",
|
|||
|
"4 -122.25 3.422 \n",
|
|||
|
"... ... ... \n",
|
|||
|
"20635 -121.09 0.781 \n",
|
|||
|
"20636 -121.21 0.771 \n",
|
|||
|
"20637 -121.22 0.923 \n",
|
|||
|
"20638 -121.32 0.847 \n",
|
|||
|
"20639 -121.24 0.894 \n",
|
|||
|
"\n",
|
|||
|
"[20640 rows x 9 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 26,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"cal_housing['MedHouseVal'] = housing.target # add column: MedHouseVal which can be found in the JSON Object by target key\n",
|
|||
|
"cal_housing"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 27,
|
|||
|
"id": "62a4c8c5-f0df-4f48-b473-18dfe7492c0d",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"MedInc 0\n",
|
|||
|
"HouseAge 0\n",
|
|||
|
"AveRooms 0\n",
|
|||
|
"AveBedrms 0\n",
|
|||
|
"Population 0\n",
|
|||
|
"AveOccup 0\n",
|
|||
|
"Latitude 0\n",
|
|||
|
"Longitude 0\n",
|
|||
|
"MedHouseVal 0\n",
|
|||
|
"dtype: int64"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 27,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"cal_housing.isnull().sum() ## look for null values inside dataframe"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 28,
|
|||
|
"id": "e812be9e-af15-4f7c-b309-9c0cce3e98c0",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABSIAAAZECAYAAAD2bX79AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdfVxU553///eoOIiBiWBgJCFqs2hMMKmrETHJaqtAjIRk3Q1NTIkmrnFXqyXqGq1NM7YGIt2oXWhudG00oiFtU/LLjSFgmpC6aEQSW7XWphtjY8pIbhBUcBjx/P7wy6kj98gww/B6Ph48ZK7zOedcnznAgY/XuS6LYRiGAAAAAAAAAMCL+vi6AwAAAAAAAAACH4VIAAAAAAAAAF5HIRIAAAAAAACA11GIBAAAAAAAAOB1FCIBAAAAAAAAeB2FSAAAAAAAAABeRyESAAAAAAAAgNdRiAQAAAAAAADgdRQiAQAAAAAAAHgdhUigBZs3b5bFYpHFYtF7773XZLthGPqHf/gHWSwWTZ48ucvOO2zYMM2ePdt8/d5778lisejXv/51l50DAICLNd7z9u3b1+z2lJQUDRs2rHs71QVee+01WSwWRUREyOVy+bo7AIAe6r//+79lsVgUFxfn9XMNGzbM/DvUYrFo4MCB+sd//Efl5ubKMAyvnx/wNgqRQBtCQ0O1adOmJu0lJSX6v//7P4WGhvqgVwAAoC2N9++vv/5ar776qm87AwDosX7xi19Ikg4dOqQPPvjA6+e79dZbtXv3bu3evVtbt25VSEiIFi5cqKysLK+fG/A2CpFAG77zne/olVdeUU1NjUf7pk2blJCQoGuvvdZHPQMAAC1xOp3asWOHvv3tbys4OLjZ/1QEAKAt+/bt0+9//3tNnz5dkrrlfnLllVdqwoQJmjBhgmbMmKEdO3bIZrPp+eef9/q5AW+jEAm04f7775ckvfTSS2ZbdXW1XnnlFT388MNN4uvr67V69Wpdf/31slqtuuqqq/TQQw/piy++8Ihzu91atmyZ7Ha7QkJCdNttt2nv3r3t6pPD4ZDFYtGhQ4d0//33y2azKSoqSg8//LCqq6s9Ys+fP6+cnBx985vf1IABA8yb2muvvdbRtwIAAEnS2bNntWLFCg0fPlz9+/fX1VdfrQULFujkyZMecRaLRQ6Ho8n+l05DUltbq6VLl2r48OEKDg5WeHi4xo0b53HvlS78MZiamqrw8HAFBwdrzJgx+uUvf9lsH7ds2aJz587p0Ucf1YwZM/TOO+/o2LFjTeJOnjypOXPmKDw8XFdccYWmT5+uTz75pNm+f/zxx5o5c6YiIyNltVo1atQo/fznP2/XewYA6JkaC49PPfWUJk6cqPz8fNXW1srtdisyMlLp6elN9jl58qQGDBigxYsXm201NTXmva7x3pmRkaEzZ8602YewsDCNGDFCJ06c8Gj/+uuvNX/+fF199dXq37+/vvGNb2jlypVNpiNp73172LBhSklJ0RtvvKExY8ZowIABGjVqlN544w1JF6ZyGTVqlAYOHKjx48c3mdLlk08+0X333afo6GhZrVZFRUVpypQp2r9/f5s5ovfo5+sOAP4uLCxM//qv/6pf/OIXmjdvnqQLRck+ffroO9/5jtavX2/Gnj9/Xnfffbd+97vfadmyZZo4caKOHTumJ554QpMnT9a+ffs0YMAASdLcuXP14osvaunSpUpMTNTBgwc1Y8YMnTp1qt19+5d/+Rd95zvf0Zw5c3TgwAGtWLFC0t8fHZCk2bNnKy8vT3PmzNGPf/xj9e/fXx9++KE+/fTTy39zAAABpaGhQefOnWvSfvGcVIZh6J577tE777yjFStW6Pbbb9cf/vAHPfHEE+ZjZFartUPnXbx4sbZu3arVq1drzJgxOnPmjA4ePKivvvrKjHn33Xd1xx13KD4+Xs8995xsNpvy8/P1ne98R7W1tR6FTenCvXDIkCGaNm2aBgwYoO3bt2vz5s164oknzJjz58/rrrvu0r59++RwOPSP//iP2r17t+64444mffzjH/+oiRMn6tprr9XTTz8tu92ut99+W4sWLdKXX37pcVwAQGCoq6vTSy+9pFtuuUVxcXF6+OGH9W//9m/61a9+pVmzZum73/2unnvuOf385z9XWFiYud9LL72ks2fP6qGHHpJ04T/cJk2apOPHj+sHP/iBbrrpJh06dEg/+tGPdODAAe3cuVMWi6XFfpw7d06fffaZRowYYbadPXtW3/rWt/R///d/WrVqlW666Sb97ne/U1ZWlvbv368333xTUsfv27///e+1YsUKrVy5UjabTatWrdKMGTO0YsUKvfPOO8rMzJTFYtFjjz2mlJQUHT161Pwb984771RDQ4Oys7N17bXX6ssvv1RpaWmTgid6OQNAs1544QVDklFWVma8++67hiTj4MGDhmEYxi233GLMnj3bMAzDuPHGG41JkyYZhmEYL730kiHJeOWVVzyOVVZWZkgynnnmGcMwDOPw4cOGJOPRRx/1iNu2bZshyZg1a5bZ1njuX/3qV2bbE088YUgysrOzPfafP3++ERwcbJw/f94wDMN4//33DUnGypUrL/8NAQAErMZ7XmsfQ4cONQzDMAoLC5u9B7388suGJGPDhg1mmyTjiSeeaHK+oUOHetzr4uLijHvuuafVPl5//fXGmDFjDLfb7dGekpJiDBkyxGhoaDDbGu9/y5cvNwzDMM6fP28MHz7cGDp0qHmPNAzDePPNNw1JxrPPPutxzKysrCZ9T05ONq655hqjurraI/Z73/ueERwcbHz99det9h8A0PO8+OKLhiTjueeeMwzDME6dOmVcccUVxu23324YhmH84Q9/aHLvMwzDGD9+vDF27FjzdVZWltGnTx+jrKzMI+7Xv/61IcnYsWOH2TZ06FDjzjvvNNxut+F2u41jx44Zc+fONYKCgow33njDjHvuuecMScYvf/lLj2OuWbPGkGQUFRUZhtGx+/bQoUONAQMGGMePHzfb9u/fb0gyhgwZYpw5c8Zsf/XVVw1JxmuvvWYYhmF8+eWXhiRj/fr1bb2t6OV4NBtoh0mTJum6667TL37xCx04cEBlZWXNPpb9xhtv6Morr9Rdd92lc+fOmR/f/OY3ZbfbzdW33333XUnSAw884LF/Wlqa+vVr/0Dl1NRUj9c33XSTzp49q8rKSknSW2+9JUlasGBBu48JAOi9XnzxRZWVlTX5uO2228yY3/72t5LUZATivffeq4EDB+qdd97p8HnHjx+vt956S8uXL9d7772nuro6j+1/+ctf9Kc//cm8b158j73zzjtVUVGhI0eOmPGNj9E13qstFotmz56tY8eOefSvpKRE0oX778Uap2VpdPbsWb3zzjv653/+Z4WEhDQ5/9mzZ7Vnz54O5w0A8G+bNm3SgAEDdN9990mSrrjiCt1777363e9+p48//lijR4/W2LFj9cILL5j7HD58WHv37vX4e/GNN95QXFycvvnNb3rcQ5KTk2WxWMy/Exvt2LFDQUFBCgoK0tChQ7Vx40bl5OSY81RKF+7HAwcO1L/+67967Nt4f26833X0vv3Nb35TV199tfl61KhRkqTJkycrJCSkSXvjtCfh4eG67rrr9NOf/lRr167VRx99pPPnz7fy7qK3ohAJtIPFYtFDDz2kvLw8PffccxoxYoRuv/32JnEnTpzQyZMn1b9/f/PG0fjhdDr15ZdfSpL5qJndbvfYv1+/foqIiGh3vy6NbRxS3/gH3BdffKG+ffs2OQ8AAM0ZNWqUxo0b1+TDZrOZMV999ZX69eunq666ymNfi8Uiu93u8Th1e/33f/+3HnvsMb366qv61re+pfDwcN1zzz36+OOPJcmcE2vp0qVN7q/z58+XJPMee+rUKf3qV7/S+PHjddVVV+nkyZM6efKk/vmf/1kWi8VjkYHGXMLDwz36ExUV5fH6q6++0rlz55STk9Pk/HfeeafH+QEAgeEvf/mL3n//fU2fPl2GYZj3k8bCX+N0WA8//LB2796tP/3pT5KkF154QVar1eM/tU6cOKE//OEPTe4hoaGhMgyjyT3kttt
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1600x2000 with 9 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import matplotlib.pyplot as plt ## visualization library\n",
|
|||
|
"\n",
|
|||
|
"cal_housing.hist(bins=120, figsize=(16,20)) \n",
|
|||
|
"plt.subplots_adjust(hspace=0.7, wspace=0.4)\n",
|
|||
|
"#plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 29,
|
|||
|
"id": "abb96d05-6c16-4f31-a90d-7032e346457e",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"0 8.3252\n",
|
|||
|
"1 8.3014\n",
|
|||
|
"2 7.2574\n",
|
|||
|
"3 5.6431\n",
|
|||
|
"4 3.8462\n",
|
|||
|
" ... \n",
|
|||
|
"20635 1.5603\n",
|
|||
|
"20636 2.5568\n",
|
|||
|
"20637 1.7000\n",
|
|||
|
"20638 1.8672\n",
|
|||
|
"20639 2.3886\n",
|
|||
|
"Name: MedInc, Length: 20640, dtype: float64"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 29,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"cal_housing[cal_housing.columns[0]] ## show data of first column in that case: Median Income"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 30,
|
|||
|
"id": "5bbceaef-a9af-4573-bcd3-a0cc8d3e472b",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"range(0, 9)\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"print(range(9)) ## means inclusive beginning, exclusive at the end! --> 0-8"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 31,
|
|||
|
"id": "6464dc1a-b433-4606-801f-e838cfb5488c",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"(('John', 1), ('Charles', 2), ('Mike', 3))\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"## How ZIP work's?\n",
|
|||
|
"a = (\"John\", \"Charles\", \"Mike\")\n",
|
|||
|
"b = (1, 2, 3, 4)\n",
|
|||
|
"x = zip(a, b)\n",
|
|||
|
"print(tuple(x))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 32,
|
|||
|
"id": "49540a5c-f083-4c03-b6bb-9af70d02800f",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABSwAAAZDCAYAAAD1oX72AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzde3zU9Z33/dccksmBnBMyCUQ5GBAMIIIiaAuWg1WRdt2Wu8W69lrr0qW3NhVry2XbjV4trLQgK+y21csVlFK6e3fpardVwVZaBJWDHA0E5AwJgZDzYTKH3/3HZEZCzmFmfpPk/Xw85lEy883MZ9TySz7zOVgMwzAQERERERERERERiQJWswMQERERERERERERCVDCUkRERERERERERKKGEpYiIiIiIiIiIiISNZSwFBERERERERERkaihhKWIiIiIiIiIiIhEDSUsRUREREREREREJGooYSkiIiIiIiIiIiJRQwlLERERERERERERiRp2swPoK3w+H+fPnycpKQmLxWJ2OCIi0gHDMKitrSU3NxerVZ/LtUfXNBGRvkPXta7puiYi0nd097qmhGU3nT9/nry8PLPDEBGRbjpz5gxDhw41O4yopGuaiEjfo+tax3RdExHpe7q6rilh2U1JSUmA/x9ocnKyydGIiEhHampqyMvLC/69LW3pmiYi0nfoutY1XddERPqO7l7XlLDspkBrQXJysi6CIiJ9gFrCOqZrmohI36PrWsd0XRMR6Xu6uq5pCIqIiIiIiIiIiIhEDSUsRUREREREREREJGooYSkiIiIiIiIiIiJRQwlLERERERERERERiRqmJiz/8pe/cP/995Obm4vFYuF3v/tdq8cNw6CoqIjc3Fzi4+OZMWMGhw4danXG5XLx2GOPkZmZSWJiIvPmzePs2bOtzlRWVvLQQw+RkpJCSkoKDz30EFVVVWF+dyIiIiIiIiIiItJTpiYs6+vrmTBhAmvWrGn38eXLl7Ny5UrWrFnDzp07cTqdzJ49m9ra2uCZwsJCNm3axMaNG9m2bRt1dXXMnTsXr9cbPLNgwQL27t3Lm2++yZtvvsnevXt56KGHwv7+REREREREREREpGfsZr74Pffcwz333NPuY4ZhsGrVKp5++mkeeOABANatW0d2djYbNmxg4cKFVFdX8/LLL/Paa68xa9YsANavX09eXh5btmzh7rvvpri4mDfffJP333+fKVOmAPDSSy8xdepUjhw5wujRoyPzZkVERERERERERKRLUTvD8sSJE5SVlTFnzpzgfQ6Hg+nTp7N9+3YAdu/ejdvtbnUmNzeXgoKC4JkdO3aQkpISTFYC3H777aSkpATPtMflclFTU9PqJiIiIiIiIiIiIuEVtQnLsrIyALKzs1vdn52dHXysrKyM2NhY0tLSOj0zePDgNs8/ePDg4Jn2LFu2LDjzMiUlhby8vGt6PyIiIiIiIiIiItK1qE1YBlgsllZfG4bR5r6rXX2mvfNdPc+SJUuorq4O3s6cOdPDyEVERERERERERKSnojZh6XQ6AdpUQZaXlwerLp1OJ83NzVRWVnZ65sKFC22e/+LFi22qN6/kcDhITk5udRMREREREREREZHwitqE5fDhw3E6nWzevDl4X3NzM1u3bmXatGkATJo0iZiYmFZnSktLOXjwYPDM1KlTqa6u5sMPPwye+eCDD6iurg6eERERERERERERkehg6pbwuro6jh07Fvz6xIkT7N27l/T0dK677joKCwtZunQp+fn55Ofns3TpUhISEliwYAEAKSkpPPLIIyxevJiMjAzS09N58sknGTduXHBr+JgxY/j85z/Po48+yi9/+UsA/uEf/oG5c+dqQ7iIiIiIiIiIiEiUMTVhuWvXLu66667g10888QQADz/8MGvXruWpp56isbGRRYsWUVlZyZQpU3j77bdJSkoKfs/zzz+P3W5n/vz5NDY2MnPmTNauXYvNZgue+dWvfsXjjz8e3CY+b9481qxZE6F3KSIiIiIiIiIiIt1lMQzDMDuIvqCmpoaUlBSqq6s1z1JEJIrp7+uu6Z+RiEjfob+zu6Z/RiIifUd3/86O2hmWIiIiIiIiIiIiMvAoYSkiIiIiIiIiIiJRw9QZltL/uN1uiouLW903ZswYYmJiTIpIRERERERERET6EiUsJaSKi4vZvOIFhmUNBuDkxXJY/Djjx483OTIRERERkdCbOG48pWWlnZ7Jcebw0YH9EYpIwsntduP1enE4HFgsFrPDERHpt5SwlJAbljWYUblDzA5DRERERCTsSstK2bykqNMzs5d1/rj0HV6vl49WrOHW7xVit+vXaRGRcNEMSxEREREREZFusln1a7SISLjpb1oRERERERERERGJGkpYioiIiIiIiHSDx+PB5/OZHYaISL+nhKWIiIiIiIiIiIhEDU0JFhERERGJEG2UFum7DMPA4/EAhtmhiIj0e0pYioiIiIhEiDZKi/RdXq+X3Sv/lfiYGLNDERHp99QSLiIiIiIiItIN2hAuIhIZ+ttWREREREREREREooYSliIiIiIiIiIiIhI1lLAUERERERERERGRqKGEpYiIiIiIiIiIiEQNbQkXERERiWITx42ntKy00zM5zhw+OrA/QhGJiIiIiISXEpYiIiIiUay0rJTNS4o6PTN7WeePi4iIiIj0JWoJFxERERERERERkaihhKWIiIiIiIj0mMfj4Qc/+AHDhw8nPj6eESNG8Oyzz+Lz+YJnDMOgqKiI3Nxc4uPjmTFjBocOHWr1PC6Xi8cee4zMzEwSExOZN28eZ8+ejfTbERGRKKKEpXTJ7Xazf//+Vje32212WCIiIiIiYqLnnnuOX/ziF6xZs4bi4mKWL1/OT3/6U1avXh08s3z5clauXMmaNWvYuXMnTqeT2bNnU1tbGzxTWFjIpk2b2LhxI9u2baOuro65c+fi9XrNeFsiIhIFNMNSulRcXMzmFS8wLGswACcvlsPixxk/frzJkYmIiIiIiFl27NjBF77wBe677z4Ahg0bxq9//Wt27doF+KsrV61axdNPP80DDzwAwLp168jOzmbDhg0sXLiQ6upqXn75ZV577TVmzZoFwPr168nLy2PLli3cfffd5rw5ERExlSospVuGZQ1mVO4QRuUOCSYuRURERERk4Lrzzjt55513KCkpAWDfvn1s27aNe++9F4ATJ05QVlbGnDlzgt/jcDiYPn0627dvB2D37t243e5WZ3JzcykoKAieuZrL5aKmpqbVTURE+hdVWIqIiIiIiEiPfe9736O6upobb7wRm82G1+vlJz/5CV/96lcBKCsrAyA7O7vV92VnZ3Pq1KngmdjYWNLS0tqcCXz/1ZYtW8YzzzwT6rcjIiJRRBWWIiIiIiIi0mO/+c1vWL9+PRs2bGDPnj2sW7eOn/3sZ6xbt67VOYvF0uprwzDa3He1zs4sWbKE6urq4O3MmTPX9kZERCTqqMJSREREREREeuy73/0u3//+9/nKV74CwLhx4zh16hTLli3j4Ycfxul0Av4qypycnOD3lZeXB6sunU4nzc3NVFZWtqqyLC8vZ9q0ae2+rsPhwOFwhOttiYhIFFCFpYiIiIiIiPRYQ0MDVmvrXyltNhs+nw+A4cOH43Q62bx5c/Dx5uZmtm7dGkxGTpo0iZiYmFZnSktLOXjwYIcJSxER6f9UYSkiIiIiIiI9dv/99/OTn/yE6667jptuuomPPvqIlStX8vd///eAvxW8sLCQpUuXkp+fT35+PkuXLiUhIYEFCxYAkJKSwiOPPMLixYvJyMggPT2dJ598knHjxgW3houIyMCjhKWIiIiIiIj02OrVq/nhD3/IokWLKC8vJzc3l4ULF/KjH/0oeOapp56isbGRRYsWUVlZyZQpU3j77bdJSkoKnnn++eex2+3Mnz+fxsZGZs6cydq1a7HZbGa8rU4ZPh9EYVwiIv2NEpYiIiIiIiLSY0lJSaxatYpVq1Z1eMZisVBUVERRUVGHZ+Li4li9ejWrV68OfZAhdGnvAcrX/wcpkyaYHYqISL+nGZYiIiIiIiIiXajcfxDD46Hp/AWzQxER6feUsBQ
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1600x2000 with 9 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import seaborn as sns ## library more fancy visualization's\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"fig, axes = plt.subplots(ncols=3, nrows=3, figsize=(16,20)) ## define canvas\n",
|
|||
|
"# print(axes.shape)\n",
|
|||
|
"for i, ax in zip(range(9), axes.flat):\n",
|
|||
|
" sns.histplot(data=cal_housing[cal_housing.columns[i]], kde=True, ax=ax, color=\"#a71d2a\")\n",
|
|||
|
" "
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 33,
|
|||
|
"id": "14afd85d-241b-4bf4-99e6-35dfab342492",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>AveRooms</th>\n",
|
|||
|
" <th>AveBedrms</th>\n",
|
|||
|
" <th>AveOccup</th>\n",
|
|||
|
" <th>Population</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>count</th>\n",
|
|||
|
" <td>20640.000000</td>\n",
|
|||
|
" <td>20640.000000</td>\n",
|
|||
|
" <td>20640.000000</td>\n",
|
|||
|
" <td>20640.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>mean</th>\n",
|
|||
|
" <td>5.429000</td>\n",
|
|||
|
" <td>1.096675</td>\n",
|
|||
|
" <td>3.070655</td>\n",
|
|||
|
" <td>1425.476744</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>std</th>\n",
|
|||
|
" <td>2.474173</td>\n",
|
|||
|
" <td>0.473911</td>\n",
|
|||
|
" <td>10.386050</td>\n",
|
|||
|
" <td>1132.462122</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>min</th>\n",
|
|||
|
" <td>0.846154</td>\n",
|
|||
|
" <td>0.333333</td>\n",
|
|||
|
" <td>0.692308</td>\n",
|
|||
|
" <td>3.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>25%</th>\n",
|
|||
|
" <td>4.440716</td>\n",
|
|||
|
" <td>1.006079</td>\n",
|
|||
|
" <td>2.429741</td>\n",
|
|||
|
" <td>787.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>50%</th>\n",
|
|||
|
" <td>5.229129</td>\n",
|
|||
|
" <td>1.048780</td>\n",
|
|||
|
" <td>2.818116</td>\n",
|
|||
|
" <td>1166.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>75%</th>\n",
|
|||
|
" <td>6.052381</td>\n",
|
|||
|
" <td>1.099526</td>\n",
|
|||
|
" <td>3.282261</td>\n",
|
|||
|
" <td>1725.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>max</th>\n",
|
|||
|
" <td>141.909091</td>\n",
|
|||
|
" <td>34.066667</td>\n",
|
|||
|
" <td>1243.333333</td>\n",
|
|||
|
" <td>35682.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" AveRooms AveBedrms AveOccup Population\n",
|
|||
|
"count 20640.000000 20640.000000 20640.000000 20640.000000\n",
|
|||
|
"mean 5.429000 1.096675 3.070655 1425.476744\n",
|
|||
|
"std 2.474173 0.473911 10.386050 1132.462122\n",
|
|||
|
"min 0.846154 0.333333 0.692308 3.000000\n",
|
|||
|
"25% 4.440716 1.006079 2.429741 787.000000\n",
|
|||
|
"50% 5.229129 1.048780 2.818116 1166.000000\n",
|
|||
|
"75% 6.052381 1.099526 3.282261 1725.000000\n",
|
|||
|
"max 141.909091 34.066667 1243.333333 35682.000000"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 33,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"features_of_interest = [\"AveRooms\", \"AveBedrms\", \"AveOccup\", \"Population\"]\n",
|
|||
|
"cal_housing[features_of_interest].describe()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "8e67ec4b-0f55-4c86-a777-95c0f503362a",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"further information about [color_palettes](https://seaborn.pydata.org/tutorial/color_palettes.html)!"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 34,
|
|||
|
"id": "255e7d1a-a9cc-4da4-b081-3ac787336721",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAArsAAAHWCAYAAAB34UGbAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOy9d5xkRbn//66TOk/OYSdszgQXxATrIsiiEuSqBIl68Qom9KqoSBBBQL2oV1REFhXMIqJ+f1yRZACRDBvZNLszOzmn7j6pfn+cmd7p7Znd2bxgvV+v5UXXqXOqzune7U8/9dTnEVJKiUKhUCgUCoVC8TpEO9wTUCgUCoVCoVAoDhZK7CoUCoVCoVAoXrcosatQKBQKhUKheN2ixK5CoVAoFAqF4nWLErsKhUKhUCgUitctSuwqFAqFQqFQKF63KLGrUCgUCoVCoXjdosSuQqFQKBQKheJ1ixK7CoVCoVAoFIrXLUrsKo447rnnHoQQCCF4/PHHc45LKZk1axZCCE466aQDOnZ9fT0XX3xx5vXjjz8+5TwONhdffDHxePyQj/taRQjBddddd7inkWH8c9zU1HS4p3JAmOzvwsUXX0x9ff1hm9N0+c53vsOsWbOwLAshBP39/Yd7SgqF4hCixK7iiCWRSPCjH/0op/2JJ55g8+bNJBKJgz6HY445hqeeeopjjjnmoI+lULzWuOaaa/jd7353uKexW1588UU+/vGPs3z5ch599FGeeuqpQ/Jvh0KhOHIwDvcEFIqpeP/73899993Hd7/7XfLy8jLtP/rRjzjhhBMYHBw86HPIy8vjjW9840EfR6F4LTJz5szDPYU9smbNGgA+/OEPc9xxxx3m2SgUisOBiuwqjljOPfdcAH7+859n2gYGBvjtb3/LpZdeOuk5tm1z4403Mm/ePEKhEKWlpVxyySV0dXVl9XMch89+9rNUVFQQjUZ5y1vewr/+9a+c6022dPvss8/ygQ98gPr6eiKRCPX19Zx77rls27Yt69zxZezHHnuM//qv/6KkpITi4mLOPvtsWltbp/0cNm3axMqVK4nH49TW1vLpT3+adDqd1ae3t5ePfvSjVFdXY1kWjY2NfPGLX8zq19TUhBCCe+65J2eMXVMAurq6+M///E9qa2szz/HNb34zf/nLX7LO+8tf/sKKFSvIy8sjGo3y5je/mUceeWS399PV1YVlWVxzzTU5x9avX48Qgm9/+9uZvh/96EdZsGAB8XicsrIy3v72t/O3v/1tT4+N6667DiFETvtU6QW//OUvOeGEE4jFYsTjcU499VReeOGFPY4D8M9//pM3v/nNhMNhqqqquPrqq3EcZ9K+0xlnPIVlzZo1rFixglgsRmlpKVdeeSWjo6NZfaWU3HHHHRx11FFEIhEKCws555xz2LJlS1a/k046iUWLFvHMM8/w1re+lWg0SmNjI1/72tfwfT+r7/r163nnO99JNBqlpKSEj3zkIwwNDeXcy2RpDEIIrrzySn76058yf/58otEoS5cu5Y9//GPO+b///e9ZsmQJoVCIxsZGvvWtb035vk3G3XffzdKlSwmHwxQVFXHWWWexbt26rHu+4IILADj++OMRQmSlKSkUin8PlNhVHLHk5eVxzjnncPfdd2fafv7zn6NpGu9///tz+vu+zxlnnMHXvvY1zjvvPP70pz/xta99jYcffpiTTjqJZDKZ6fvhD3+Yr3/961x44YX8/ve/573vfS9nn302fX19e5xXU1MTc+fO5fbbb+f//u//uOWWW2hra2PZsmV0d3fn9P/Qhz6EaZr87Gc/49Zbb+Xxxx/PfAHvCcdxeM973sOKFSv4/e9/z6WXXsr//M//cMstt2T6pFIpli9fzk9+8hOuuuoq/vSnP3HBBRdw6623cvbZZ09rnF354Ac/yAMPPMCXv/xl/vznP3PXXXdx8skn09PTk+lz7733csopp5CXl8ePf/xjfvWrX1FUVMSpp566W8FbWlrKu971Ln784x/niKxVq1ZhWRbnn38+EIh4gGuvvZY//elPrFq1isbGRk466aQDmkd90003ce6557JgwQJ+9atf8dOf/pShoSHe+ta3snbt2t2eu3btWlasWEF/fz/33HMP3//+93nhhRe48cYb92scx3FYuXIlK1as4IEHHuDKK6/kBz/4Qc5n//LLL+eTn/wkJ598Mg888AB33HEHa9as4U1vehMdHR1Zfdvb2zn//PO54IILePDBBznttNO4+uqruffeezN9Ojo6OPHEE1m9ejV33HEHP/3pTxkeHubKK6+c9vP805/+xP/+7/9yww038Nvf/jYjRCcK8Iceeoizzz6b4uJifvnLX3Lrrbfy85//nB//+MfTGuPmm2/msssuY+HChdx///1861vf4uWXX+aEE05g48aNANxxxx186UtfAoLP1lNPPTXpjyyFQvE6RyoURxirVq2SgHzmmWfkY489JgG5evVqKaWUy5YtkxdffLGUUsqFCxfKE088MXPez3/+cwnI3/72t1nXe+aZZyQg77jjDimllOvWrZOA/NSnPpXV77777pOAvOiiizJt4+M/9thjU87XdV05PDwsY7GY/Na3vpVzHx/96Eez+t96660SkG1tbbt9DhdddJEE5K9+9aus9pUrV8q5c+dmXn//+9+ftN8tt9wiAfnnP/9ZSinl1q1bJSBXrVqVMxYgr7322szreDwuP/nJT045t5GREVlUVCTf/e53Z7V7nieXLl0qjzvuuN3e24MPPpg1NymD51hVVSXf+973Tnme67rScRy5YsUKedZZZ+32Hq699lo52T9x4+/L1q1bpZRSbt++XRqGIT/2sY9l9RsaGpIVFRXyfe97327v5f3vf7+MRCKyvb09a57z5s3b53HG3/uJnycppfzqV78qAfn3v/9dSinlU089JQH5jW98I6tfc3OzjEQi8rOf/Wym7cQTT5SAfPrpp7P6LliwQJ566qmZ15/73OekEEK++OKLWf3e8Y535PxduOiii2RdXV1WP0CWl5fLwcHBTFt7e7vUNE3efPPNmbZly5bJ2tpamU6ns55FcXHxpO/bRPr6+mQkEpErV67Mat++fbsMhULyvPPOy7RN/PdEoVD8e6Iiu4ojmhNPPJGZM2dy991388orr/DMM89MmcLwxz/+kYKCAt797nfjum7mz1FHHUVFRUUmEvjYY48BZKKH47zvfe/DMPacxj48PMznPvc5Zs2ahWEYGIZBPB5nZGQkawl1nPe85z1Zr5csWQKQk/YwGUII3v3ud+ecP/HcRx99lFgsxjnnnJPVb3y5dk9pBZNx3HHHcc8993DjjTfyz3/+M2dJ/sknn6S3t5eLLroo61n7vs873/lOnnnmGUZGRqa8/mmnnUZFRQWrVq3KtP3f//0fra2tOe/v97//fY455hjC4TCGYWCaJo888sikz3pf+L//+z9c1+XCCy/MupdwOMyJJ564xwjyY489xooVKygvL8+06bqeE4Hdl3F2/Yyed955mTEh+MwLIbjggguyrllRUcHSpUtzrllRUZGTt7rr5+mxxx5j4cKFLF26dNKxp8Py5cuzNoGVl5dTVlaWGWdkZIRnn32WM888E8uyMv3i8XjO530ynnrqKZLJZE5KQm1tLW9/+9v36TOvUChev6gNaoojGiEEl1xyCd/+9rdJpVLMmTOHt771rZP27ejooL+/P+vLcyLjKQbjS/EVFRVZxw3DoLi4eI9zOu+883jkkUe45pprWLZsGXl5eQghWLlyZVaqxDi7XjMUCgFM2ndXotEo4XA45/xUKpV53dPTQ0VFRU6eY1lZGYZhZKUeTJdf/vKX3Hjjjdx1111cc801xONxzjrrLG699VYqKioyy+O7CuyJ9Pb2EovFJj1mGAYf/OAH+c53vkN/fz8FBQXcc889VFZWcuqpp2b6ffOb3+TTn/40H/nIR/jKV75CSUkJuq5zzTXXHDCxO34vy5Ytm/S4pu0+JjD+/Hdl17a9HWeyz+P4Ncff046ODqSUWUJ7Io2NjVmvJ/t8h0KhrM9iT08PDQ0Ne7yf3bGncfr6+qac91T3MpHx+6+
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"sns.scatterplot(\n",
|
|||
|
" data=cal_housing,\n",
|
|||
|
" x=\"Longitude\",\n",
|
|||
|
" y=\"Latitude\",\n",
|
|||
|
" size=\"MedHouseVal\",\n",
|
|||
|
" hue=\"MedHouseVal\",\n",
|
|||
|
" palette=\"flare\", ## see above color palette\n",
|
|||
|
" alpha=0.5,\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"plt.legend(title=\"MedHouseVal\", bbox_to_anchor=(1.05, 0.95), loc=\"upper left\")\n",
|
|||
|
"plt.title(\"Median house value depending of\\n their spatial location\")\n",
|
|||
|
"plt.savefig('cal_housing_scatter.png', bbox_inches='tight')"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 35,
|
|||
|
"id": "eaff2b02-a3c3-43a0-aa07-4665f31356f6",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"import numpy as np\n",
|
|||
|
"\n",
|
|||
|
"rng = np.random.RandomState(0)\n",
|
|||
|
"indices = rng.choice(\n",
|
|||
|
" np.arange(cal_housing.shape[0]), size=500, replace=False\n",
|
|||
|
")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 36,
|
|||
|
"id": "107092f6-00e8-493c-bd0b-5c36d575f437",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAArsAAAHWCAYAAAB34UGbAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAADe+UlEQVR4nOzdeXxU1fn48c9dZs1k3xNCEkD2TRQVN0BcKtRarV+tS0HUfrVqrbWLonWtFbf2q7VVq1XQatXWVqu1P6tV0KqogKAiiwqBLGTfJ5n13vP7IzAyJIEQExLC83698tI599x7n5lMmGfOPfc5mlJKIYQQQgghxBCkD3QAQgghhBBC9BdJdoUQQgghxJAlya4QQgghhBiyJNkVQgghhBBDliS7QgghhBBiyJJkVwghhBBCDFmS7AohhBBCiCFLkl0hhBBCCDFkSbIrhBBCCCGGLEl2xaCzdOlSNE1D0zSWL1/eabtSilGjRqFpGrNmzerTcxcVFXHhhRfGHi9fvrzbOPrbhRdeiM/n2+/nPVBpmsYtt9wy0GHE7Hwfb926daBD6RNd/S1ceOGFFBUVDVhMPfXAAw8watQonE4nmqbR1NQ00CEJIfYjSXbFoJWYmMhjjz3Wqf2tt95i8+bNJCYm9nsM06ZNY8WKFUybNq3fzyXEgebGG2/khRdeGOgw9mjt2rVcddVVzJ49mzfffJMVK1bsl387hBCDhznQAQjRnXPOOYenn36a3//+9yQlJcXaH3vsMWbMmEFLS0u/x5CUlMRRRx3V7+cR4kA0cuTIgQ5hrz777DMAvv/973PEEUcMcDRCiIEgI7ti0Dr33HMBeOaZZ2Jtzc3N/O1vf+Oiiy7qcp9wOMztt9/O2LFjcblcZGZmsnDhQmpra+P6RSIRfv7zn5OTk4PX6+XYY4/lww8/7HS8ri7drlq1iu9+97sUFRXh8XgoKiri3HPPZdu2bXH77ryMvWzZMn7wgx+QkZFBeno6Z555Jtu3b+/x6/Dll18yd+5cfD4fBQUF/OQnPyEUCsX1aWho4PLLLyc/Px+n08mIESO44YYb4vpt3boVTdNYunRpp3PsPgWgtraW//3f/6WgoCD2Oh5zzDH85z//idvvP//5D3PmzCEpKQmv18sxxxzDG2+8scfnU1tbi9Pp5MYbb+y0bePGjWiaxm9/+9tY38svv5zx48fj8/nIysrihBNO4L///e/eXjZuueUWNE3r1N7d9ILnnnuOGTNmkJCQgM/n45RTTmHNmjV7PQ/A+++/zzHHHIPb7SYvL49FixYRiUS67NuT8+ycwvLZZ58xZ84cEhISyMzM5Morr6S9vT2ur1KKBx98kKlTp+LxeEhNTeWss85iy5Ytcf1mzZrFxIkTWblyJccddxxer5cRI0Zw5513Ytt2XN+NGzfyjW98A6/XS0ZGBpdddhmtra2dnktX0xg0TePKK6/kT3/6E+PGjcPr9TJlyhT++c9/dtr/H//4B5MnT8blcjFixAjuv//+bn9vXXn88ceZMmUKbrebtLQ0zjjjDDZs2BD3nC+44AIAjjzySDRNi5umJIQ4OEiyKwatpKQkzjrrLB5//PFY2zPPPIOu65xzzjmd+tu2zemnn86dd97JeeedxyuvvMKdd97J66+/zqxZswgEArG+3//+97n33nuZP38+//jHP/jOd77DmWeeSWNj417j2rp1K2PGjOG+++7j3//+N3fddReVlZVMnz6durq6Tv0vueQSHA4Hf/7zn7n77rtZvnx57AN4byKRCN/61reYM2cO//jHP7jooov4v//7P+66665Yn2AwyOzZs3nyySe55ppreOWVV7jgggu4++67OfPMM3t0nt1973vf48UXX+Smm27itdde449//CMnnngi9fX1sT5PPfUUJ598MklJSTzxxBP85S9/IS0tjVNOOWWPCW9mZibf/OY3eeKJJzolWUuWLMHpdHL++ecDHUk8wM0338wrr7zCkiVLGDFiBLNmzerTedR33HEH5557LuPHj+cvf/kLf/rTn2htbeW4445j/fr1e9x3/fr1zJkzh6amJpYuXcrDDz/MmjVruP3227/WeSKRCHPnzmXOnDm8+OKLXHnllfzhD3/o9N6/9NJLufrqqznxxBN58cUXefDBB/nss884+uijqa6ujutbVVXF+eefzwUXXMBLL73EqaeeyqJFi3jqqadifaqrq5k5cybr1q3jwQcf5E9/+hN+v58rr7yyx6/nK6+8wu9+9ztuu+02/va3v8US0V0T8FdffZUzzzyT9PR0nnvuOe6++26eeeYZnnjiiR6dY/HixVx88cVMmDCBv//979x///188sknzJgxgy+++AKABx98kF/84hdAx3trxYoVXX7JEkIMcUqIQWbJkiUKUCtXrlTLli1TgFq3bp1SSqnp06erCy+8UCml1IQJE9TMmTNj+z3zzDMKUH/729/ijrdy5UoFqAcffFAppdSGDRsUoH784x/H9Xv66acVoBYsWBBr23n+ZcuWdRtvNBpVfr9fJSQkqPvvv7/T87j88svj+t99990KUJWVlXt8HRYsWKAA9Ze//CWufe7cuWrMmDGxxw8//HCX/e666y4FqNdee00ppVRJSYkC1JIlSzqdC1A333xz7LHP51NXX311t7G1tbWptLQ0ddppp8W1W5alpkyZoo444og9PreXXnopLjalOl7HvLw89Z3vfKfb/aLRqIpEImrOnDnqjDPO2ONzuPnmm1VX/8Tt/L2UlJQopZQqLS1VpmmqH/7wh3H9WltbVU5Ojjr77LP3+FzOOecc5fF4VFVVVVycY8eO7fV5dv7ud30/KaXUr371KwWod955Ryml1IoVKxSgfv3rX8f1KysrUx6PR/385z+Ptc2cOVMB6oMPPojrO378eHXKKafEHl977bVK0zS1du3auH4nnXRSp7+FBQsWqMLCwrh+gMrOzlYtLS2xtqqqKqXrulq8eHGsbfr06aqgoECFQqG41yI9Pb3L39uuGhsblcfjUXPnzo1rLy0tVS6XS5133nmxtl3/PRFCHJxkZFcMajNnzmTkyJE8/vjjfPrpp6xcubLbKQz//Oc/SUlJ4bTTTiMajcZ+pk6dSk5OTmwkcNmyZQCx0cOdzj77bExz79PY/X4/1157LaNGjcI0TUzTxOfz0dbWFncJdadvfetbcY8nT54M0GnaQ1c0TeO0007rtP+u+7755pskJCRw1llnxfXbebl2b9MKunLEEUewdOlSbr/9dt5///1Ol+Tfe+89GhoaWLBgQdxrbds23/jGN1i5ciVtbW3dHv/UU08lJyeHJUuWxNr+/e9/s3379k6/34cffphp06bhdrsxTROHw8Ebb7zR5WvdG//+97+JRqPMnz8/7rm43W5mzpy51xHkZcuWMWfOHLKzs2NthmF0GoHtzXl2f4+ed955sXNCx3te0zQuuOCCuGPm5OQwZcqUTsfMycnpNG919/fTsmXLmDBhAlOmTOny3D0xe/bsuJvAsrOzycrKip2nra2NVatW8e1vfxun0xnr5/P5Or3fu7JixQoCgUCnKQkFBQWccMIJvXrPCyGGLrlBTQxqmqaxcOFCfvvb3xIMBhk9ejTHHXdcl32rq6tpamqK+/Dc1c4pBjsvxefk5MRtN02T9PT0vcZ03nnn8cYbb3DjjTcyffp0kpKS0DSNuXPnxk2V2Gn3Y7pcLoAu++7O6/Xidrs77R8MBmOP6+vrycnJ6TTPMSsrC9M046Ye9NRzzz3H7bffzh//+EduvPFGfD4fZ5xxBnfffTc5OTmxy+O7J9i7amhoICEhocttpmnyve99jwceeICmpiZSUlJYunQpubm5nHLKKbF+v/nNb/jJT37CZZddxi9/+UsyMjIwDIMbb7yxz5Ldnc9l+vTpXW7X9T2PCex8/Xe3e9u+nqer9+POY+78nVZXV6OUiku0dzVixIi4x129v10uV9x7sb6+nuLi4r0+nz3Z23kaGxu7jbu757Krnc8/Nze307a8vDxef/31HscqhBj6JNkVg96FF17ITTfdxMMPP8yvfvWrbvvtvAHs1Vdf7XL7zpGmnR/
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"sns.scatterplot(\n",
|
|||
|
" data=cal_housing.iloc[indices],\n",
|
|||
|
" x=\"Longitude\",\n",
|
|||
|
" y=\"Latitude\",\n",
|
|||
|
" size=\"MedHouseVal\",\n",
|
|||
|
" hue=\"MedHouseVal\",\n",
|
|||
|
" palette=\"flare\",\n",
|
|||
|
" alpha=0.5,\n",
|
|||
|
")\n",
|
|||
|
"plt.legend(title=\"MedHouseVal\", bbox_to_anchor=(1.05, 1), loc=\"upper left\")\n",
|
|||
|
"_ = plt.title(\"Median house value depending of\\n their spatial location\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 37,
|
|||
|
"id": "cd13a20a-914e-4cfd-ac46-d96b078c845f",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjoAAAXECAYAAAB0rtelAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8g+/7EAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hUZfbA8e+dmplMSW8QCL0LIkgXEBVQUOy9YG9r3VV3dffnupZ117U3rGDvYBcRkSZFegm9BQikJzOT6TP398dAYMgMooZMEs7neXh2c987M2fAN3Pnnvc9R1FVVUUIIYQQQgghhBBCCCGEEKIZ0iQ6ACGEEEIIIYQQQgghhBBCiN9LEh1CCCGEEEIIIYQQQgghhGi2JNEhhBBCCCGEEEIIIYQQQohmSxIdQgghhBBCCCGEEEIIIYRotiTRIYQQQgghhBBCCCGEEEKIZksSHUIIIYQQQgghhBBCCCGEaLYk0SGEEEIIIYQQQgghhBBCiGZLEh1CCCGEEEIIIYQQQgghhGi2JNFxhFRVxeFwoKpqokMRQsQh81SI5kHmqhDNg8xVIZo+madCNA8yV4UQ4uiTRMcRcjqd2O12nE5nokMRQsQh81SI5kHmqhDNg8xVIZo+madCNA8yV4UQ4uiTRIcQQgghhBBCCCGEEEIIIZotSXQIIYQQQgghhBBCCCGEEKLZkkSHEEIIIYQQQgghhBBCCCGaLUl0CCGEEEIIIYQQQgghhBCi2ZJEhxBCiGYh4AsQ8AUSHYYQUVRVxe/xEwwGEx2KEMeEcCiM3+MnFAolOhQhxFEUDAbxe/yJDkOIY8b+a9pQUD5fhRDNly7RAQghhBCHU1NWQ9Hancz/eD4AQ84bTJuebbBn2hMcmTjWVRZXsmb2Glb9uBpbhpWTLjmJrDZZmO3mRIcmRIsTDASpLK5k0bRF7FhTRF6nXAafN5j0VunojfpEhyeEaCC1NbWUbi9lzntzcVW5OO7kXvQ4qQdpeWmJDk2IFklVVSqLK1n5w0oK560jNTuFYZecRGZ+BiarKdHhCSHEbyKJDiGEEE1WTVkNU+55i02/bKo7tnb2Wjr268BV/7kKe5YkO0RilBWV8dTlT+OscNYd++XLJZzxp9MZfslw+WIoRAPbsWoHz1/7AsFAZPfUhgUbmP3uHG54/nq6DOqCVqdNcIRCiD/K7XAz662fmD5pet2x9T+vx5Y5nTvfuoOM/IwERidEy1S6rZQnL38Kd4277tjCaYs4775zGXDOAJLMSQmMTgghfhspXSWEEKLJ2vTL5qgkx36bl2yJeVyIxuCt9fL5/z6PSnLs9/Vz31BTVpOAqIRouapLq5l875S6JMd+4VCYyfdOwVHmSFBkQoiGVFNaE5Xk2M9R5uCr57/G5/YlICohWi63w81Hj34cleTY79PHP8NZXv9aVwghmjJJdAghhGiS3A43c96fE3d8zvtzcTvqX5QLcbTVVrtZNWt13PF189c3YjRCtHyuShfVe6tjjnkcHmrKJdEhREuw+qc1cceWT19ObU1tI0YjRMtXW13LxoUbY46pqsrW5VsbOSIhhPhjJNHRgvi2rsW99EdUVU10KEII8YepYZVQIH4zvKA/iBqW33ei8amqetj/9gLeQCNGI0TL92u/68PSOFWIFiHgi//5GQ6GQS77hGhQv3bvKOALHnZcCCGaGkl0tBBqOEzN55Nwzf6M4N4diQ5HCCH+MLPdTP9x/eOOnzi+vzR9FglhsibRoW+HuOPdhnZrxGiEaPksqRbMtti/7/VGvfRrEqKF6Dm8R9yxroO6kGSRXgFCNCST1USrLq3ijnc8If71rhBCNEWS6GghguW7QQ0D4N9emOBohBDij1MUhd6jjovZeDK9dTq9T+mNoigJiEwc65LtyZz3t3PRG/X1xvqO7UtqTkrjByVEC2bLtHHB38+POTbhzxOwZdgaOSIhxNGQlpdG71N71zuuT9Iz4S8T4iY8hRC/jzXNykX/uBCtTltvbMgFg7FmWBMQlRBC/H6KKnWOjojD4cBut1NTU4PN1vS+THnWLsQ5/R20GXlobWmkTLgx0SEJ0eia+jwVv0/VnioWfb6IRZ8vRlVVBpx1IgMmDCQtNzXRoYnfqSXM1WAwSEVRBd+/NoONizaSnJLMqImj6DKws9x0FS1GU5qrXpeXPVv28M0L37Jn8x4y22Qy9uYxtO7aWm5+imNaU5qnDcFR7mDd/PX8OOVH3DVuug7uwinXnEJG64yYN2OFaC6a6lwN+oOUFZUxfdJ0tizbijXdyqnXnkLHfh2xpkmiQwjRvEii4wg11Q+l/Vzzv8Sz+mcMrTsSKCki45p/JjokIRpdU5+n4vcLhULUVtaiApa0ZLRa+aLbnLWkuer3+PE4PWh0GvkyKFqcpjhXPU4Pfo8ffZJeEhxC0DTnaUNwVjoJB8OYbCYMSYZEhyPEH9bU56rP7cPr8qLVa7GkWhIdjhBC/C66RAcgGkaouhyN2YbGmkp40wrUUBBFK/+8QoiWQavVYstsel8IhDCYDBhMcgNGiMZispowWU2JDkMIcZTJ4gEhGpfRbMRoNiY6DCGE+EOkR0cLEXZVozFb0CTbQVUJOaoSHZIQQgghhBBCCCGEEEIIcdRJoqOFCLmq0ZiS0SRHVr6EHeUJjkgIIYQQQgghhBBCCCGEOPok0dFChGudKElmNKZILcWQszqxAQkhhBBCCCGEEEIIIYQQjUASHS2AGvBB0I/GaEbR6lCSkglLokMIIYQQQgghhBBCCCHEMaBFJDrmzJnD+PHjycvLQ1EUpk2bFvfcG264AUVRePrppxstvqMt7HYCoCSZAdCYkgm5qhMYkRBCCCGEEEIIIYQQQgjROFpEoqO2tpbevXvz/PPPH/a8adOmsWjRIvLy8hopssYR9tQCoBiSIv+blEzYVZPIkIQQQgghhBBCCCGEEEKIRqFLdAANYezYsYwdO/aw5+zevZtbb72V6dOnc8YZZzRSZI1jf6JDYzRF/tckpauEEEIIIYQQQgghhBBCHBtaRKLj14TDYS6//HL+8pe/0KNHjyN6jM/nw+fz1f3scDiOVnh/mOo9dEeHmXBJUSJDEqJRNKd5KsSxTOaqEM2DzFUhmj6Zp0I0DzJXhRCi8bWI0lW/5vHHH0en03Hbbbcd8WMee+wx7HZ73Z/8/PyjGOEfE/bWgkYL2kjeSpNkJuxxoarhBEcmxNHVnOapEMcymatCNA8yV4Vo+mSeCtE8yFwVQojGp6iqqiY6iIakKApTp05lwoQJACxdupQzzjiDZcuW1fXmKCgo4I477uCOO+6I+zyxsu/5+fnU1NRgs9mO5lv4zWoXfot72U/Yx18DgH/XZtwLviHjpn+jMVkSHJ0QR09zmqdCHMtkrgrRPMhcFaLpk3kqRPMgc1UIIRpfiy9dNXfuXEpLS2nTpk3dsVAoxN13383TTz/N9u3bYz7OaDRiNBobKco/Jux1oxgOxKpJMkeO1zol0SFatOY0T4U4lslcFaJ5kLkqRNMn81SI5kHmqhBCNL4Wn+i4/PLLOeWUU6KOjR49mssvv5yJEycmKKqGpfo8KPoDH6CKcV+iw+0EchMUlRBCCCGEEEIIIYQQQghx9LWIRIfL5WLz5s11P2/bto0VK1aQlpZGmzZtSE9Pjzpfr9eTk5NDly5dGjvUoyKS6DDU/axJMgEQdkuzKyGEEEIIIYQQQgghhBAtW4tIdCxZsoSRI0fW/XzXXXcBcOWVVzJ58uQERdV4wockOtAZQKMl7HElLighhBBCCCGEEEIIIYQQohG0iETHiBEj+C091eP15WiuVJ8HjcVe97OiKChJZsJuSXQIIZomr8uLs9KJ3+MnyZKELcOG3qhPdFhCHFY4FKamrAa3w41OryM5JRlLqvTCEqKxuKpcuKpqCQWDmG1m7Jl2NFpNosMSQjQxwUCQmlIHXpcHfZIea5oVk9WU6LCEaLI8Tg+uShd+b+S7mT3Ljk7fIm4XCiGOMfKbqwVQ/V4UXWbUMY3RhCo7OoQQTVDVnio+fvQT1vy0BlVV0SfpOfnKkxl+yUlY062JDk+ImNwON2vnrOW
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1604.61x1500 with 42 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Drop unwanted columns\n",
|
|||
|
"columns_drop = [\"Longitude\", \"Latitude\"]\n",
|
|||
|
"subset = cal_housing.iloc[indices].drop(columns=columns_drop) ## create subset of original dataframe\n",
|
|||
|
"\n",
|
|||
|
"# Quantize the target and keep the midpoint for each interval\n",
|
|||
|
"subset[\"MedHouseVal\"] = pd.qcut(subset[\"MedHouseVal\"], 6, retbins=False) ## qcut subset the MedHouseVal into 6 quantiles --> Categories\n",
|
|||
|
"subset[\"MedHouseVal\"] = subset[\"MedHouseVal\"].apply(lambda x: x.mid)\n",
|
|||
|
"\n",
|
|||
|
"_ = sns.pairplot(data=subset, hue=\"MedHouseVal\", palette=\"flare\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 38,
|
|||
|
"id": "f20f0730-f457-4169-892e-96169fd8f81e",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"## standard score of a sample x is calculated as:\n",
|
|||
|
"## z = (x - u) / s\n",
|
|||
|
"## u = the mean of the training samples or zero if with_mean=False\n",
|
|||
|
"## s = the standard deviation of the training samples or one if with_std=False.\n",
|
|||
|
"\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler ## Standardize features by removing the mean and scaling to unit variance\n",
|
|||
|
"from sklearn.linear_model import LinearRegression ## load LinearModel\n",
|
|||
|
"\n",
|
|||
|
"from sklearn.pipeline import make_pipeline ## pipeline construction of different estimators\n",
|
|||
|
"from sklearn.model_selection import cross_validate \n",
|
|||
|
"\n",
|
|||
|
"#alphas = np.logspace(-3, 1, num=30)\n",
|
|||
|
"model = make_pipeline(StandardScaler(), LinearRegression()) ## \n",
|
|||
|
"\n",
|
|||
|
"cv_results = cross_validate(\n",
|
|||
|
" model,\n",
|
|||
|
" housing.data,\n",
|
|||
|
" housing.target,\n",
|
|||
|
" return_estimator=True\n",
|
|||
|
"\n",
|
|||
|
")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 39,
|
|||
|
"id": "a6b8e46f-e0d2-4c17-8769-53bf15e72ffb",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"R2 score: 0.553 ± 0.062\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"score = cv_results[\"test_score\"]\n",
|
|||
|
"print(f\"R2 score: {score.mean():.3f} ± {score.std():.3f}\") # correlation coefficient that measures the strength of the relationship between two variables. Range: |-1, 1|\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"id": "5f454352-5e3f-4669-9f53-1f765f1e467e",
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": []
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "Python 3 (ipykernel)",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.11.7"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 5
|
|||
|
}
|