1260 lines
45 KiB
Plaintext
1260 lines
45 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "b0c0ae08-2fb5-47f5-a5ce-1a66e35791a4",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Cleaning Data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "f9998a78-ae01-4531-b325-637b6d5ee86d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "9516a86a-ed6a-4f79-b631-3195daec258c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df = pd.read_csv('https://gist.githubusercontent.com/maltegrosse/bdfd2c6a5e3bff315d92cd27c2461a48/raw/49d5672953360934601b3d252c9b78121eed10db/data.csv')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "ea25a32c-70d3-479d-8d11-7e487f13f50c",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Duration</th>\n",
|
|
" <th>Date</th>\n",
|
|
" <th>Pulse</th>\n",
|
|
" <th>Maxpulse</th>\n",
|
|
" <th>Calories</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/01'</td>\n",
|
|
" <td>110</td>\n",
|
|
" <td>130</td>\n",
|
|
" <td>409.1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/02'</td>\n",
|
|
" <td>117</td>\n",
|
|
" <td>145</td>\n",
|
|
" <td>479.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/03'</td>\n",
|
|
" <td>103</td>\n",
|
|
" <td>135</td>\n",
|
|
" <td>340.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>45</td>\n",
|
|
" <td>'2020/12/04'</td>\n",
|
|
" <td>109</td>\n",
|
|
" <td>175</td>\n",
|
|
" <td>282.4</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>45</td>\n",
|
|
" <td>'2020/12/05'</td>\n",
|
|
" <td>117</td>\n",
|
|
" <td>148</td>\n",
|
|
" <td>406.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/06'</td>\n",
|
|
" <td>102</td>\n",
|
|
" <td>127</td>\n",
|
|
" <td>300.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/07'</td>\n",
|
|
" <td>110</td>\n",
|
|
" <td>136</td>\n",
|
|
" <td>374.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>7</th>\n",
|
|
" <td>450</td>\n",
|
|
" <td>'2020/12/08'</td>\n",
|
|
" <td>104</td>\n",
|
|
" <td>134</td>\n",
|
|
" <td>253.3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>8</th>\n",
|
|
" <td>30</td>\n",
|
|
" <td>'2020/12/09'</td>\n",
|
|
" <td>109</td>\n",
|
|
" <td>133</td>\n",
|
|
" <td>195.1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>9</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/10'</td>\n",
|
|
" <td>98</td>\n",
|
|
" <td>124</td>\n",
|
|
" <td>269.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>10</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/11'</td>\n",
|
|
" <td>103</td>\n",
|
|
" <td>147</td>\n",
|
|
" <td>329.3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>11</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/12'</td>\n",
|
|
" <td>100</td>\n",
|
|
" <td>120</td>\n",
|
|
" <td>250.7</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>12</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/12'</td>\n",
|
|
" <td>100</td>\n",
|
|
" <td>120</td>\n",
|
|
" <td>250.7</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>13</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/13'</td>\n",
|
|
" <td>106</td>\n",
|
|
" <td>128</td>\n",
|
|
" <td>345.3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>14</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/14'</td>\n",
|
|
" <td>104</td>\n",
|
|
" <td>132</td>\n",
|
|
" <td>379.3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>15</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/15'</td>\n",
|
|
" <td>98</td>\n",
|
|
" <td>123</td>\n",
|
|
" <td>275.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>16</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/16'</td>\n",
|
|
" <td>98</td>\n",
|
|
" <td>120</td>\n",
|
|
" <td>215.2</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>17</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/17'</td>\n",
|
|
" <td>100</td>\n",
|
|
" <td>120</td>\n",
|
|
" <td>300.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>18</th>\n",
|
|
" <td>45</td>\n",
|
|
" <td>'2020/12/18'</td>\n",
|
|
" <td>90</td>\n",
|
|
" <td>112</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>19</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/19'</td>\n",
|
|
" <td>103</td>\n",
|
|
" <td>123</td>\n",
|
|
" <td>323.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>20</th>\n",
|
|
" <td>45</td>\n",
|
|
" <td>'2020/12/20'</td>\n",
|
|
" <td>97</td>\n",
|
|
" <td>125</td>\n",
|
|
" <td>243.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>21</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/21'</td>\n",
|
|
" <td>108</td>\n",
|
|
" <td>131</td>\n",
|
|
" <td>364.2</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>22</th>\n",
|
|
" <td>45</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>100</td>\n",
|
|
" <td>119</td>\n",
|
|
" <td>282.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>23</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/23'</td>\n",
|
|
" <td>130</td>\n",
|
|
" <td>101</td>\n",
|
|
" <td>300.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>24</th>\n",
|
|
" <td>45</td>\n",
|
|
" <td>'2020/12/24'</td>\n",
|
|
" <td>105</td>\n",
|
|
" <td>132</td>\n",
|
|
" <td>246.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>25</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/25'</td>\n",
|
|
" <td>102</td>\n",
|
|
" <td>126</td>\n",
|
|
" <td>334.5</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>26</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>20201226</td>\n",
|
|
" <td>100</td>\n",
|
|
" <td>120</td>\n",
|
|
" <td>250.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>27</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/27'</td>\n",
|
|
" <td>92</td>\n",
|
|
" <td>118</td>\n",
|
|
" <td>241.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>28</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/28'</td>\n",
|
|
" <td>103</td>\n",
|
|
" <td>132</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>29</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/29'</td>\n",
|
|
" <td>100</td>\n",
|
|
" <td>132</td>\n",
|
|
" <td>280.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>30</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/30'</td>\n",
|
|
" <td>102</td>\n",
|
|
" <td>129</td>\n",
|
|
" <td>380.3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>31</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/31'</td>\n",
|
|
" <td>92</td>\n",
|
|
" <td>115</td>\n",
|
|
" <td>243.0</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Duration Date Pulse Maxpulse Calories\n",
|
|
"0 60 '2020/12/01' 110 130 409.1\n",
|
|
"1 60 '2020/12/02' 117 145 479.0\n",
|
|
"2 60 '2020/12/03' 103 135 340.0\n",
|
|
"3 45 '2020/12/04' 109 175 282.4\n",
|
|
"4 45 '2020/12/05' 117 148 406.0\n",
|
|
"5 60 '2020/12/06' 102 127 300.0\n",
|
|
"6 60 '2020/12/07' 110 136 374.0\n",
|
|
"7 450 '2020/12/08' 104 134 253.3\n",
|
|
"8 30 '2020/12/09' 109 133 195.1\n",
|
|
"9 60 '2020/12/10' 98 124 269.0\n",
|
|
"10 60 '2020/12/11' 103 147 329.3\n",
|
|
"11 60 '2020/12/12' 100 120 250.7\n",
|
|
"12 60 '2020/12/12' 100 120 250.7\n",
|
|
"13 60 '2020/12/13' 106 128 345.3\n",
|
|
"14 60 '2020/12/14' 104 132 379.3\n",
|
|
"15 60 '2020/12/15' 98 123 275.0\n",
|
|
"16 60 '2020/12/16' 98 120 215.2\n",
|
|
"17 60 '2020/12/17' 100 120 300.0\n",
|
|
"18 45 '2020/12/18' 90 112 NaN\n",
|
|
"19 60 '2020/12/19' 103 123 323.0\n",
|
|
"20 45 '2020/12/20' 97 125 243.0\n",
|
|
"21 60 '2020/12/21' 108 131 364.2\n",
|
|
"22 45 NaN 100 119 282.0\n",
|
|
"23 60 '2020/12/23' 130 101 300.0\n",
|
|
"24 45 '2020/12/24' 105 132 246.0\n",
|
|
"25 60 '2020/12/25' 102 126 334.5\n",
|
|
"26 60 20201226 100 120 250.0\n",
|
|
"27 60 '2020/12/27' 92 118 241.0\n",
|
|
"28 60 '2020/12/28' 103 132 NaN\n",
|
|
"29 60 '2020/12/29' 100 132 280.0\n",
|
|
"30 60 '2020/12/30' 102 129 380.3\n",
|
|
"31 60 '2020/12/31' 92 115 243.0"
|
|
]
|
|
},
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"df"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "2baf29d8-cd8f-4dfd-931a-c413a995320e",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Duration</th>\n",
|
|
" <th>Date</th>\n",
|
|
" <th>Pulse</th>\n",
|
|
" <th>Maxpulse</th>\n",
|
|
" <th>Calories</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/01'</td>\n",
|
|
" <td>110</td>\n",
|
|
" <td>130</td>\n",
|
|
" <td>409.1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/02'</td>\n",
|
|
" <td>117</td>\n",
|
|
" <td>145</td>\n",
|
|
" <td>479.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/03'</td>\n",
|
|
" <td>103</td>\n",
|
|
" <td>135</td>\n",
|
|
" <td>340.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>45</td>\n",
|
|
" <td>'2020/12/04'</td>\n",
|
|
" <td>109</td>\n",
|
|
" <td>175</td>\n",
|
|
" <td>282.4</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>45</td>\n",
|
|
" <td>'2020/12/05'</td>\n",
|
|
" <td>117</td>\n",
|
|
" <td>148</td>\n",
|
|
" <td>406.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/06'</td>\n",
|
|
" <td>102</td>\n",
|
|
" <td>127</td>\n",
|
|
" <td>300.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/07'</td>\n",
|
|
" <td>110</td>\n",
|
|
" <td>136</td>\n",
|
|
" <td>374.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>7</th>\n",
|
|
" <td>450</td>\n",
|
|
" <td>'2020/12/08'</td>\n",
|
|
" <td>104</td>\n",
|
|
" <td>134</td>\n",
|
|
" <td>253.3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>8</th>\n",
|
|
" <td>30</td>\n",
|
|
" <td>'2020/12/09'</td>\n",
|
|
" <td>109</td>\n",
|
|
" <td>133</td>\n",
|
|
" <td>195.1</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>9</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/10'</td>\n",
|
|
" <td>98</td>\n",
|
|
" <td>124</td>\n",
|
|
" <td>269.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>10</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/11'</td>\n",
|
|
" <td>103</td>\n",
|
|
" <td>147</td>\n",
|
|
" <td>329.3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>11</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/12'</td>\n",
|
|
" <td>100</td>\n",
|
|
" <td>120</td>\n",
|
|
" <td>250.7</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>12</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/12'</td>\n",
|
|
" <td>100</td>\n",
|
|
" <td>120</td>\n",
|
|
" <td>250.7</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>13</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/13'</td>\n",
|
|
" <td>106</td>\n",
|
|
" <td>128</td>\n",
|
|
" <td>345.3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>14</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/14'</td>\n",
|
|
" <td>104</td>\n",
|
|
" <td>132</td>\n",
|
|
" <td>379.3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>15</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/15'</td>\n",
|
|
" <td>98</td>\n",
|
|
" <td>123</td>\n",
|
|
" <td>275.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>16</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/16'</td>\n",
|
|
" <td>98</td>\n",
|
|
" <td>120</td>\n",
|
|
" <td>215.2</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>17</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/17'</td>\n",
|
|
" <td>100</td>\n",
|
|
" <td>120</td>\n",
|
|
" <td>300.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>19</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/19'</td>\n",
|
|
" <td>103</td>\n",
|
|
" <td>123</td>\n",
|
|
" <td>323.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>20</th>\n",
|
|
" <td>45</td>\n",
|
|
" <td>'2020/12/20'</td>\n",
|
|
" <td>97</td>\n",
|
|
" <td>125</td>\n",
|
|
" <td>243.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>21</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/21'</td>\n",
|
|
" <td>108</td>\n",
|
|
" <td>131</td>\n",
|
|
" <td>364.2</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>23</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/23'</td>\n",
|
|
" <td>130</td>\n",
|
|
" <td>101</td>\n",
|
|
" <td>300.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>24</th>\n",
|
|
" <td>45</td>\n",
|
|
" <td>'2020/12/24'</td>\n",
|
|
" <td>105</td>\n",
|
|
" <td>132</td>\n",
|
|
" <td>246.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>25</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/25'</td>\n",
|
|
" <td>102</td>\n",
|
|
" <td>126</td>\n",
|
|
" <td>334.5</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>26</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>20201226</td>\n",
|
|
" <td>100</td>\n",
|
|
" <td>120</td>\n",
|
|
" <td>250.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>27</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/27'</td>\n",
|
|
" <td>92</td>\n",
|
|
" <td>118</td>\n",
|
|
" <td>241.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>29</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/29'</td>\n",
|
|
" <td>100</td>\n",
|
|
" <td>132</td>\n",
|
|
" <td>280.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>30</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/30'</td>\n",
|
|
" <td>102</td>\n",
|
|
" <td>129</td>\n",
|
|
" <td>380.3</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>31</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/31'</td>\n",
|
|
" <td>92</td>\n",
|
|
" <td>115</td>\n",
|
|
" <td>243.0</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Duration Date Pulse Maxpulse Calories\n",
|
|
"0 60 '2020/12/01' 110 130 409.1\n",
|
|
"1 60 '2020/12/02' 117 145 479.0\n",
|
|
"2 60 '2020/12/03' 103 135 340.0\n",
|
|
"3 45 '2020/12/04' 109 175 282.4\n",
|
|
"4 45 '2020/12/05' 117 148 406.0\n",
|
|
"5 60 '2020/12/06' 102 127 300.0\n",
|
|
"6 60 '2020/12/07' 110 136 374.0\n",
|
|
"7 450 '2020/12/08' 104 134 253.3\n",
|
|
"8 30 '2020/12/09' 109 133 195.1\n",
|
|
"9 60 '2020/12/10' 98 124 269.0\n",
|
|
"10 60 '2020/12/11' 103 147 329.3\n",
|
|
"11 60 '2020/12/12' 100 120 250.7\n",
|
|
"12 60 '2020/12/12' 100 120 250.7\n",
|
|
"13 60 '2020/12/13' 106 128 345.3\n",
|
|
"14 60 '2020/12/14' 104 132 379.3\n",
|
|
"15 60 '2020/12/15' 98 123 275.0\n",
|
|
"16 60 '2020/12/16' 98 120 215.2\n",
|
|
"17 60 '2020/12/17' 100 120 300.0\n",
|
|
"19 60 '2020/12/19' 103 123 323.0\n",
|
|
"20 45 '2020/12/20' 97 125 243.0\n",
|
|
"21 60 '2020/12/21' 108 131 364.2\n",
|
|
"23 60 '2020/12/23' 130 101 300.0\n",
|
|
"24 45 '2020/12/24' 105 132 246.0\n",
|
|
"25 60 '2020/12/25' 102 126 334.5\n",
|
|
"26 60 20201226 100 120 250.0\n",
|
|
"27 60 '2020/12/27' 92 118 241.0\n",
|
|
"29 60 '2020/12/29' 100 132 280.0\n",
|
|
"30 60 '2020/12/30' 102 129 380.3\n",
|
|
"31 60 '2020/12/31' 92 115 243.0"
|
|
]
|
|
},
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# drop null/NaN\n",
|
|
"new_df = df.dropna()\n",
|
|
"new_df"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"id": "86df1d5f-639e-4f0b-8576-eb4a9dbee188",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|
"Index: 29 entries, 0 to 31\n",
|
|
"Data columns (total 5 columns):\n",
|
|
" # Column Non-Null Count Dtype \n",
|
|
"--- ------ -------------- ----- \n",
|
|
" 0 Duration 29 non-null int64 \n",
|
|
" 1 Date 29 non-null object \n",
|
|
" 2 Pulse 29 non-null int64 \n",
|
|
" 3 Maxpulse 29 non-null int64 \n",
|
|
" 4 Calories 29 non-null float64\n",
|
|
"dtypes: float64(1), int64(3), object(1)\n",
|
|
"memory usage: 1.4+ KB\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"df.locnew_df.info()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "37533007-2851-49da-8fca-2e9d3b74c406",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# hint df.dropna(inplace = True) <- manipulates orginal df"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "e94f0608-1928-4dec-b28c-3f56d72b1867",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# fill missing values\n",
|
|
"# df.fillna(130, inplace = True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "025cec14-2687-4ec5-9fa9-f10f1da927ea",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/tmp/ipykernel_733/185300893.py:3: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
|
|
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
|
|
"\n",
|
|
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
|
|
"\n",
|
|
"\n",
|
|
" df[\"Calories\"].fillna(x, inplace=True)\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Duration</th>\n",
|
|
" <th>Date</th>\n",
|
|
" <th>Pulse</th>\n",
|
|
" <th>Maxpulse</th>\n",
|
|
" <th>Calories</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/01'</td>\n",
|
|
" <td>110</td>\n",
|
|
" <td>130</td>\n",
|
|
" <td>409.10</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/02'</td>\n",
|
|
" <td>117</td>\n",
|
|
" <td>145</td>\n",
|
|
" <td>479.00</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/03'</td>\n",
|
|
" <td>103</td>\n",
|
|
" <td>135</td>\n",
|
|
" <td>340.00</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>45</td>\n",
|
|
" <td>'2020/12/04'</td>\n",
|
|
" <td>109</td>\n",
|
|
" <td>175</td>\n",
|
|
" <td>282.40</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>45</td>\n",
|
|
" <td>'2020/12/05'</td>\n",
|
|
" <td>117</td>\n",
|
|
" <td>148</td>\n",
|
|
" <td>406.00</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/06'</td>\n",
|
|
" <td>102</td>\n",
|
|
" <td>127</td>\n",
|
|
" <td>300.00</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/07'</td>\n",
|
|
" <td>110</td>\n",
|
|
" <td>136</td>\n",
|
|
" <td>374.00</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>7</th>\n",
|
|
" <td>450</td>\n",
|
|
" <td>'2020/12/08'</td>\n",
|
|
" <td>104</td>\n",
|
|
" <td>134</td>\n",
|
|
" <td>253.30</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>8</th>\n",
|
|
" <td>30</td>\n",
|
|
" <td>'2020/12/09'</td>\n",
|
|
" <td>109</td>\n",
|
|
" <td>133</td>\n",
|
|
" <td>195.10</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>9</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/10'</td>\n",
|
|
" <td>98</td>\n",
|
|
" <td>124</td>\n",
|
|
" <td>269.00</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>10</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/11'</td>\n",
|
|
" <td>103</td>\n",
|
|
" <td>147</td>\n",
|
|
" <td>329.30</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>11</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/12'</td>\n",
|
|
" <td>100</td>\n",
|
|
" <td>120</td>\n",
|
|
" <td>250.70</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>12</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/12'</td>\n",
|
|
" <td>100</td>\n",
|
|
" <td>120</td>\n",
|
|
" <td>250.70</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>13</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/13'</td>\n",
|
|
" <td>106</td>\n",
|
|
" <td>128</td>\n",
|
|
" <td>345.30</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>14</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/14'</td>\n",
|
|
" <td>104</td>\n",
|
|
" <td>132</td>\n",
|
|
" <td>379.30</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>15</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/15'</td>\n",
|
|
" <td>98</td>\n",
|
|
" <td>123</td>\n",
|
|
" <td>275.00</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>16</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/16'</td>\n",
|
|
" <td>98</td>\n",
|
|
" <td>120</td>\n",
|
|
" <td>215.20</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>17</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/17'</td>\n",
|
|
" <td>100</td>\n",
|
|
" <td>120</td>\n",
|
|
" <td>300.00</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>18</th>\n",
|
|
" <td>45</td>\n",
|
|
" <td>'2020/12/18'</td>\n",
|
|
" <td>90</td>\n",
|
|
" <td>112</td>\n",
|
|
" <td>304.68</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>19</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/19'</td>\n",
|
|
" <td>103</td>\n",
|
|
" <td>123</td>\n",
|
|
" <td>323.00</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>20</th>\n",
|
|
" <td>45</td>\n",
|
|
" <td>'2020/12/20'</td>\n",
|
|
" <td>97</td>\n",
|
|
" <td>125</td>\n",
|
|
" <td>243.00</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>21</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/21'</td>\n",
|
|
" <td>108</td>\n",
|
|
" <td>131</td>\n",
|
|
" <td>364.20</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>22</th>\n",
|
|
" <td>45</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>100</td>\n",
|
|
" <td>119</td>\n",
|
|
" <td>282.00</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>23</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/23'</td>\n",
|
|
" <td>130</td>\n",
|
|
" <td>101</td>\n",
|
|
" <td>300.00</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>24</th>\n",
|
|
" <td>45</td>\n",
|
|
" <td>'2020/12/24'</td>\n",
|
|
" <td>105</td>\n",
|
|
" <td>132</td>\n",
|
|
" <td>246.00</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>25</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/25'</td>\n",
|
|
" <td>102</td>\n",
|
|
" <td>126</td>\n",
|
|
" <td>334.50</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>26</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>20201226</td>\n",
|
|
" <td>100</td>\n",
|
|
" <td>120</td>\n",
|
|
" <td>250.00</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>27</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/27'</td>\n",
|
|
" <td>92</td>\n",
|
|
" <td>118</td>\n",
|
|
" <td>241.00</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>28</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/28'</td>\n",
|
|
" <td>103</td>\n",
|
|
" <td>132</td>\n",
|
|
" <td>304.68</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>29</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/29'</td>\n",
|
|
" <td>100</td>\n",
|
|
" <td>132</td>\n",
|
|
" <td>280.00</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>30</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/30'</td>\n",
|
|
" <td>102</td>\n",
|
|
" <td>129</td>\n",
|
|
" <td>380.30</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>31</th>\n",
|
|
" <td>60</td>\n",
|
|
" <td>'2020/12/31'</td>\n",
|
|
" <td>92</td>\n",
|
|
" <td>115</td>\n",
|
|
" <td>243.00</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Duration Date Pulse Maxpulse Calories\n",
|
|
"0 60 '2020/12/01' 110 130 409.10\n",
|
|
"1 60 '2020/12/02' 117 145 479.00\n",
|
|
"2 60 '2020/12/03' 103 135 340.00\n",
|
|
"3 45 '2020/12/04' 109 175 282.40\n",
|
|
"4 45 '2020/12/05' 117 148 406.00\n",
|
|
"5 60 '2020/12/06' 102 127 300.00\n",
|
|
"6 60 '2020/12/07' 110 136 374.00\n",
|
|
"7 450 '2020/12/08' 104 134 253.30\n",
|
|
"8 30 '2020/12/09' 109 133 195.10\n",
|
|
"9 60 '2020/12/10' 98 124 269.00\n",
|
|
"10 60 '2020/12/11' 103 147 329.30\n",
|
|
"11 60 '2020/12/12' 100 120 250.70\n",
|
|
"12 60 '2020/12/12' 100 120 250.70\n",
|
|
"13 60 '2020/12/13' 106 128 345.30\n",
|
|
"14 60 '2020/12/14' 104 132 379.30\n",
|
|
"15 60 '2020/12/15' 98 123 275.00\n",
|
|
"16 60 '2020/12/16' 98 120 215.20\n",
|
|
"17 60 '2020/12/17' 100 120 300.00\n",
|
|
"18 45 '2020/12/18' 90 112 304.68\n",
|
|
"19 60 '2020/12/19' 103 123 323.00\n",
|
|
"20 45 '2020/12/20' 97 125 243.00\n",
|
|
"21 60 '2020/12/21' 108 131 364.20\n",
|
|
"22 45 NaN 100 119 282.00\n",
|
|
"23 60 '2020/12/23' 130 101 300.00\n",
|
|
"24 45 '2020/12/24' 105 132 246.00\n",
|
|
"25 60 '2020/12/25' 102 126 334.50\n",
|
|
"26 60 20201226 100 120 250.00\n",
|
|
"27 60 '2020/12/27' 92 118 241.00\n",
|
|
"28 60 '2020/12/28' 103 132 304.68\n",
|
|
"29 60 '2020/12/29' 100 132 280.00\n",
|
|
"30 60 '2020/12/30' 102 129 380.30\n",
|
|
"31 60 '2020/12/31' 92 115 243.00"
|
|
]
|
|
},
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"x = df[\"Calories\"].mean()\n",
|
|
"\n",
|
|
"df[\"Calories\"].fillna(x, inplace=True)\n",
|
|
"df"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "d2e87f3b-ef58-4128-b52f-799056e56de8",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/tmp/ipykernel_733/2663698494.py:3: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
|
|
"The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
|
|
"\n",
|
|
"For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
|
|
"\n",
|
|
"\n",
|
|
" df[\"Calories\"].fillna(x, inplace = True)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"x = df[\"Calories\"].median()\n",
|
|
"\n",
|
|
"df[\"Calories\"].fillna(x, inplace = True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"id": "c42df786-aa1b-4174-b436-566421f1683b",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"ename": "ValueError",
|
|
"evalue": "time data \"20201226\" doesn't match format \"'%Y/%m/%d'\", at position 26. You might want to try:\n - passing `format` if your strings have a consistent format;\n - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;\n - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.",
|
|
"output_type": "error",
|
|
"traceback": [
|
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
|
|
"Cell \u001b[0;32mIn[9], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# convert into proper data type\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mDate\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_datetime\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mDate\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m df\n",
|
|
"File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/pandas/core/tools/datetimes.py:1067\u001b[0m, in \u001b[0;36mto_datetime\u001b[0;34m(arg, errors, dayfirst, yearfirst, utc, format, exact, unit, infer_datetime_format, origin, cache)\u001b[0m\n\u001b[1;32m 1065\u001b[0m result \u001b[38;5;241m=\u001b[39m arg\u001b[38;5;241m.\u001b[39mmap(cache_array)\n\u001b[1;32m 1066\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1067\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[43mconvert_listlike\u001b[49m\u001b[43m(\u001b[49m\u001b[43marg\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_values\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1068\u001b[0m result \u001b[38;5;241m=\u001b[39m arg\u001b[38;5;241m.\u001b[39m_constructor(values, index\u001b[38;5;241m=\u001b[39marg\u001b[38;5;241m.\u001b[39mindex, name\u001b[38;5;241m=\u001b[39marg\u001b[38;5;241m.\u001b[39mname)\n\u001b[1;32m 1069\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(arg, (ABCDataFrame, abc\u001b[38;5;241m.\u001b[39mMutableMapping)):\n",
|
|
"File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/pandas/core/tools/datetimes.py:433\u001b[0m, in \u001b[0;36m_convert_listlike_datetimes\u001b[0;34m(arg, format, name, utc, unit, errors, dayfirst, yearfirst, exact)\u001b[0m\n\u001b[1;32m 431\u001b[0m \u001b[38;5;66;03m# `format` could be inferred, or user didn't ask for mixed-format parsing.\u001b[39;00m\n\u001b[1;32m 432\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mformat\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mformat\u001b[39m \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmixed\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 433\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_array_strptime_with_fallback\u001b[49m\u001b[43m(\u001b[49m\u001b[43marg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mutc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexact\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 435\u001b[0m result, tz_parsed \u001b[38;5;241m=\u001b[39m objects_to_datetime64(\n\u001b[1;32m 436\u001b[0m arg,\n\u001b[1;32m 437\u001b[0m dayfirst\u001b[38;5;241m=\u001b[39mdayfirst,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 441\u001b[0m allow_object\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 442\u001b[0m )\n\u001b[1;32m 444\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tz_parsed \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 445\u001b[0m \u001b[38;5;66;03m# We can take a shortcut since the datetime64 numpy array\u001b[39;00m\n\u001b[1;32m 446\u001b[0m \u001b[38;5;66;03m# is in UTC\u001b[39;00m\n",
|
|
"File \u001b[0;32m/opt/conda/lib/python3.11/site-packages/pandas/core/tools/datetimes.py:467\u001b[0m, in \u001b[0;36m_array_strptime_with_fallback\u001b[0;34m(arg, name, utc, fmt, exact, errors)\u001b[0m\n\u001b[1;32m 456\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_array_strptime_with_fallback\u001b[39m(\n\u001b[1;32m 457\u001b[0m arg,\n\u001b[1;32m 458\u001b[0m name,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 462\u001b[0m errors: \u001b[38;5;28mstr\u001b[39m,\n\u001b[1;32m 463\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Index:\n\u001b[1;32m 464\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 465\u001b[0m \u001b[38;5;124;03m Call array_strptime, with fallback behavior depending on 'errors'.\u001b[39;00m\n\u001b[1;32m 466\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 467\u001b[0m result, tz_out \u001b[38;5;241m=\u001b[39m \u001b[43marray_strptime\u001b[49m\u001b[43m(\u001b[49m\u001b[43marg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfmt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexact\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexact\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mutc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mutc\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 468\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tz_out \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 469\u001b[0m unit \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mdatetime_data(result\u001b[38;5;241m.\u001b[39mdtype)[\u001b[38;5;241m0\u001b[39m]\n",
|
|
"File \u001b[0;32mstrptime.pyx:501\u001b[0m, in \u001b[0;36mpandas._libs.tslibs.strptime.array_strptime\u001b[0;34m()\u001b[0m\n",
|
|
"File \u001b[0;32mstrptime.pyx:451\u001b[0m, in \u001b[0;36mpandas._libs.tslibs.strptime.array_strptime\u001b[0;34m()\u001b[0m\n",
|
|
"File \u001b[0;32mstrptime.pyx:583\u001b[0m, in \u001b[0;36mpandas._libs.tslibs.strptime._parse_with_format\u001b[0;34m()\u001b[0m\n",
|
|
"\u001b[0;31mValueError\u001b[0m: time data \"20201226\" doesn't match format \"'%Y/%m/%d'\", at position 26. You might want to try:\n - passing `format` if your strings have a consistent format;\n - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;\n - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this."
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# convert into proper data type\n",
|
|
"df['Date'] = pd.to_datetime(df['Date'])\n",
|
|
"df"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "6508edc2-f7f1-469b-a094-1b6c98a155e3",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# remove missing value according to a column\n",
|
|
"# df.dropna(subset=['Date'], inplace = True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "725032e8-c03e-428e-a928-f5c2533a3446",
|
|
"metadata": {},
|
|
"source": [
|
|
"#### Fixing Wrong Data"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "3367d5c9-90f8-4fb1-9c2b-bae2bdaeb7bf",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# row 7: 450 duration!\n",
|
|
"df.loc[7, 'Duration'] = 45"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "1a9ce891-9275-4539-a23c-4826fb258c1d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "7888f644-60a5-41e2-bd9f-acf1f5e08f5d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# remove duplicates row 11 & 12\n",
|
|
"print(df.duplicated())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "ff4ee9a2-dabb-4015-8b0c-5527f688bb21",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.drop_duplicates(inplace = True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "44165eb4-ab0c-4be0-92d6-4c8ccf2ff389",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "3033c2a4-18f1-4fcd-be75-f71f95c9097f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"df.to_csv('cleaned.csv')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "549ea6b3-3903-4b74-88ad-74c60e7d862e",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|