'pandas使用'

2019-05-16 11:59:06 +08:00 · 2019-05-16 11:59:06 +08:00 · 43c147dad1
parent aa359771ff
commit 43c147dad1
15 changed files with 22810 additions and 0 deletions
--- a/Day76-90/02.Pandas的应用.md
+++ b/Day76-90/02.Pandas的应用.md
@ -1,2 +1,25 @@
 ## Pandas的应用

+### 1、pandas入门
+
+### 2、pandas索引
+
+### 3、pandas数据清洗之空数据
+
+[数据挖掘之空数据处理（有史以来最全面）]: https://blog.csdn.net/Soft_Po/article/details/89302887
+
+### 4、pandas多层索引
+
+### 5、pandas多层索引计算
+
+### 6、pandas数据集成concat
+
+### 7、pandas数据集成merge
+
+### 8、pandas分组聚合操作
+
+### 9、pandas数据集成实战
+
+### 10、美国大选项目
+
+[2012美国大选政治献金项目数据分析（有史以来最全面）]: https://blog.csdn.net/Soft_Po/article/details/89283382
--- a/Day76-90/code/1-pandas入门.ipynb
+++ b/Day76-90/code/1-pandas入门.ipynb
@ -0,0 +1,631 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pandas import Series,DataFrame"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Math       120\n",
+       "Python     136\n",
+       "En         128\n",
+       "Chinese     99\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# 创建\n",
+    "# Series是一维的数据\n",
+    "s = Series(data = [120,136,128,99],index = ['Math','Python','En','Chinese'])\n",
+    "s"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(4,)"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "s.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([120, 136, 128,  99], dtype=int64)"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "v = s.values\n",
+    "v"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "numpy.ndarray"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "type(v)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "120.75"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "s.mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "136"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "s.max()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "15.903353943953666"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "s.std()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Math       14400\n",
+       "Python     18496\n",
+       "En         16384\n",
+       "Chinese     9801\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "s.pow(2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Python</th>\n",
+       "      <th>En</th>\n",
+       "      <th>Math</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>a</th>\n",
+       "      <td>113</td>\n",
+       "      <td>116</td>\n",
+       "      <td>75</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>b</th>\n",
+       "      <td>19</td>\n",
+       "      <td>145</td>\n",
+       "      <td>23</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>c</th>\n",
+       "      <td>57</td>\n",
+       "      <td>107</td>\n",
+       "      <td>113</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>d</th>\n",
+       "      <td>95</td>\n",
+       "      <td>3</td>\n",
+       "      <td>66</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>e</th>\n",
+       "      <td>28</td>\n",
+       "      <td>121</td>\n",
+       "      <td>120</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>f</th>\n",
+       "      <td>141</td>\n",
+       "      <td>85</td>\n",
+       "      <td>132</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>h</th>\n",
+       "      <td>124</td>\n",
+       "      <td>39</td>\n",
+       "      <td>10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>i</th>\n",
+       "      <td>80</td>\n",
+       "      <td>35</td>\n",
+       "      <td>17</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>j</th>\n",
+       "      <td>68</td>\n",
+       "      <td>99</td>\n",
+       "      <td>31</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>k</th>\n",
+       "      <td>74</td>\n",
+       "      <td>12</td>\n",
+       "      <td>11</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Python   En  Math\n",
+       "a     113  116    75\n",
+       "b      19  145    23\n",
+       "c      57  107   113\n",
+       "d      95    3    66\n",
+       "e      28  121   120\n",
+       "f     141   85   132\n",
+       "h     124   39    10\n",
+       "i      80   35    17\n",
+       "j      68   99    31\n",
+       "k      74   12    11"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# DataFrame是二维的数据\n",
+    "# excel就非诚相似\n",
+    "# 所有进行数据分析，数据挖掘的工具最基础的结果：行和列，行表示样本，列表示的是属性\n",
+    "df = DataFrame(data = np.random.randint(0,150,size = (10,3)),index = list('abcdefhijk'),columns=['Python','En','Math'])\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(10, 3)"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[113, 116,  75],\n",
+       "       [ 19, 145,  23],\n",
+       "       [ 57, 107, 113],\n",
+       "       [ 95,   3,  66],\n",
+       "       [ 28, 121, 120],\n",
+       "       [141,  85, 132],\n",
+       "       [124,  39,  10],\n",
+       "       [ 80,  35,  17],\n",
+       "       [ 68,  99,  31],\n",
+       "       [ 74,  12,  11]])"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "v = df.values\n",
+    "v"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Python    79.9\n",
+       "En        76.2\n",
+       "Math      59.8\n",
+       "dtype: float64"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Python    141\n",
+       "En        145\n",
+       "Math      132\n",
+       "dtype: int32"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.max()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Python</th>\n",
+       "      <th>En</th>\n",
+       "      <th>Math</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>a</th>\n",
+       "      <td>113</td>\n",
+       "      <td>116</td>\n",
+       "      <td>75</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>b</th>\n",
+       "      <td>19</td>\n",
+       "      <td>145</td>\n",
+       "      <td>23</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>c</th>\n",
+       "      <td>57</td>\n",
+       "      <td>107</td>\n",
+       "      <td>113</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>d</th>\n",
+       "      <td>95</td>\n",
+       "      <td>3</td>\n",
+       "      <td>66</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>e</th>\n",
+       "      <td>28</td>\n",
+       "      <td>121</td>\n",
+       "      <td>120</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>f</th>\n",
+       "      <td>141</td>\n",
+       "      <td>85</td>\n",
+       "      <td>132</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>h</th>\n",
+       "      <td>124</td>\n",
+       "      <td>39</td>\n",
+       "      <td>10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>i</th>\n",
+       "      <td>80</td>\n",
+       "      <td>35</td>\n",
+       "      <td>17</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>j</th>\n",
+       "      <td>68</td>\n",
+       "      <td>99</td>\n",
+       "      <td>31</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>k</th>\n",
+       "      <td>74</td>\n",
+       "      <td>12</td>\n",
+       "      <td>11</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Python   En  Math\n",
+       "a     113  116    75\n",
+       "b      19  145    23\n",
+       "c      57  107   113\n",
+       "d      95    3    66\n",
+       "e      28  121   120\n",
+       "f     141   85   132\n",
+       "h     124   39    10\n",
+       "i      80   35    17\n",
+       "j      68   99    31\n",
+       "k      74   12    11"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Python    79.9\n",
+       "En        76.2\n",
+       "Math      59.8\n",
+       "dtype: float64"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.mean(axis = 0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "a    101.333333\n",
+       "b     62.333333\n",
+       "c     92.333333\n",
+       "d     54.666667\n",
+       "e     89.666667\n",
+       "f    119.333333\n",
+       "h     57.666667\n",
+       "i     44.000000\n",
+       "j     66.000000\n",
+       "k     32.333333\n",
+       "dtype: float64"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.mean(axis = 1)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/Day76-90/code/2-pandas-索引.ipynb
+++ b/Day76-90/code/2-pandas-索引.ipynb
--- a/Day76-90/code/3-pandas数据清洗之空数据.ipynb
+++ b/Day76-90/code/3-pandas数据清洗之空数据.ipynb
--- a/Day76-90/code/4-pandas多层索引.ipynb
+++ b/Day76-90/code/4-pandas多层索引.ipynb
@ -0,0 +1,568 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "import pandas as pd\n",
+    "# 数据分析BI-------->人工智能AI\n",
+    "# 数据分析和数据挖掘一个意思，\n",
+    "# 工具和软件：Excel 免费版\n",
+    "# SPSS（一人一年10000）、SAS（一人一年5000）、Matlab 收费\n",
+    "# R、Python（全方位语言，流行） 免费\n",
+    "# Python + numpy + scipy + pandas + matplotlib + seaborn + pyEcharts + sklearn + kereas（Tensorflow）+…… \n",
+    "# 代码，自动化（数据输入----输出结果）\n",
+    "from pandas import Series,DataFrame"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "a     63\n",
+       "b    107\n",
+       "c     16\n",
+       "d     35\n",
+       "e    140\n",
+       "f     83\n",
+       "dtype: int32"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# 多层索引，行列\n",
+    "# 单层索引\n",
+    "s = Series(np.random.randint(0,150,size = 6),index=list('abcdef'))\n",
+    "s"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "张三  期中    114\n",
+       "    期末    131\n",
+       "李四  期中      3\n",
+       "    期末     63\n",
+       "王五  期中    107\n",
+       "    期末     34\n",
+       "dtype: int32"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# 多层索引，两层，三层以上（规则一样）\n",
+    "s2 = Series(np.random.randint(0,150,size = 6),index = pd.MultiIndex.from_product([['张三','李四','王五'],['期中','期末']]))\n",
+    "s2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>Python</th>\n",
+       "      <th>En</th>\n",
+       "      <th>Math</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"2\" valign=\"top\">张三</th>\n",
+       "      <th>期中</th>\n",
+       "      <td>73</td>\n",
+       "      <td>5</td>\n",
+       "      <td>25</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>期末</th>\n",
+       "      <td>37</td>\n",
+       "      <td>36</td>\n",
+       "      <td>56</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"2\" valign=\"top\">李四</th>\n",
+       "      <th>期中</th>\n",
+       "      <td>149</td>\n",
+       "      <td>81</td>\n",
+       "      <td>142</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>期末</th>\n",
+       "      <td>71</td>\n",
+       "      <td>138</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"2\" valign=\"top\">王五</th>\n",
+       "      <th>期中</th>\n",
+       "      <td>11</td>\n",
+       "      <td>94</td>\n",
+       "      <td>103</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>期末</th>\n",
+       "      <td>25</td>\n",
+       "      <td>121</td>\n",
+       "      <td>83</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       Python   En  Math\n",
+       "张三 期中      73    5    25\n",
+       "   期末      37   36    56\n",
+       "李四 期中     149   81   142\n",
+       "   期末      71  138     0\n",
+       "王五 期中      11   94   103\n",
+       "   期末      25  121    83"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = DataFrame(np.random.randint(0,150,size = (6,3)),columns=['Python','En','Math'],index =pd.MultiIndex.from_product([['张三','李四','王五'],['期中','期末']]) )\n",
+    "\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>Python</th>\n",
+       "      <th>En</th>\n",
+       "      <th>Math</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"4\" valign=\"top\">张三</th>\n",
+       "      <th rowspan=\"2\" valign=\"top\">期中</th>\n",
+       "      <th>A</th>\n",
+       "      <td>15</td>\n",
+       "      <td>31</td>\n",
+       "      <td>17</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B</th>\n",
+       "      <td>82</td>\n",
+       "      <td>56</td>\n",
+       "      <td>123</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"2\" valign=\"top\">期末</th>\n",
+       "      <th>A</th>\n",
+       "      <td>14</td>\n",
+       "      <td>2</td>\n",
+       "      <td>78</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B</th>\n",
+       "      <td>69</td>\n",
+       "      <td>50</td>\n",
+       "      <td>17</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"4\" valign=\"top\">李四</th>\n",
+       "      <th rowspan=\"2\" valign=\"top\">期中</th>\n",
+       "      <th>A</th>\n",
+       "      <td>91</td>\n",
+       "      <td>87</td>\n",
+       "      <td>143</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B</th>\n",
+       "      <td>120</td>\n",
+       "      <td>118</td>\n",
+       "      <td>39</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"2\" valign=\"top\">期末</th>\n",
+       "      <th>A</th>\n",
+       "      <td>56</td>\n",
+       "      <td>76</td>\n",
+       "      <td>55</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B</th>\n",
+       "      <td>11</td>\n",
+       "      <td>105</td>\n",
+       "      <td>121</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"4\" valign=\"top\">王五</th>\n",
+       "      <th rowspan=\"2\" valign=\"top\">期中</th>\n",
+       "      <th>A</th>\n",
+       "      <td>147</td>\n",
+       "      <td>78</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B</th>\n",
+       "      <td>128</td>\n",
+       "      <td>126</td>\n",
+       "      <td>146</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"2\" valign=\"top\">期末</th>\n",
+       "      <th>A</th>\n",
+       "      <td>49</td>\n",
+       "      <td>45</td>\n",
+       "      <td>114</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B</th>\n",
+       "      <td>121</td>\n",
+       "      <td>26</td>\n",
+       "      <td>77</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         Python   En  Math\n",
+       "张三 期中 A      15   31    17\n",
+       "      B      82   56   123\n",
+       "   期末 A      14    2    78\n",
+       "      B      69   50    17\n",
+       "李四 期中 A      91   87   143\n",
+       "      B     120  118    39\n",
+       "   期末 A      56   76    55\n",
+       "      B      11  105   121\n",
+       "王五 期中 A     147   78     1\n",
+       "      B     128  126   146\n",
+       "   期末 A      49   45   114\n",
+       "      B     121   26    77"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# 三层索引\n",
+    "df3 = DataFrame(np.random.randint(0,150,size = (12,3)),columns=['Python','En','Math'],index =pd.MultiIndex.from_product([['张三','李四','王五'],['期中','期末'],['A','B']]) )\n",
+    "\n",
+    "df3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "73"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# 先获取列后获取行\n",
+    "df['Python']['张三']['期中']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df2 = df.copy()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>Python</th>\n",
+       "      <th>En</th>\n",
+       "      <th>Math</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"2\" valign=\"top\">张三</th>\n",
+       "      <th>期中</th>\n",
+       "      <td>73</td>\n",
+       "      <td>5</td>\n",
+       "      <td>25</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>期末</th>\n",
+       "      <td>37</td>\n",
+       "      <td>36</td>\n",
+       "      <td>56</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"2\" valign=\"top\">李四</th>\n",
+       "      <th>期中</th>\n",
+       "      <td>149</td>\n",
+       "      <td>81</td>\n",
+       "      <td>142</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>期末</th>\n",
+       "      <td>71</td>\n",
+       "      <td>138</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"2\" valign=\"top\">王五</th>\n",
+       "      <th>期中</th>\n",
+       "      <td>11</td>\n",
+       "      <td>94</td>\n",
+       "      <td>103</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>期末</th>\n",
+       "      <td>25</td>\n",
+       "      <td>121</td>\n",
+       "      <td>83</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       Python   En  Math\n",
+       "张三 期中      73    5    25\n",
+       "   期末      37   36    56\n",
+       "李四 期中     149   81   142\n",
+       "   期末      71  138     0\n",
+       "王五 期中      11   94   103\n",
+       "   期末      25  121    83"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df2.sort_index()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "73"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# 先获取行，后获取列\n",
+    "df.loc['张三'].loc['期中']['Python']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>Python</th>\n",
+       "      <th>En</th>\n",
+       "      <th>Math</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"2\" valign=\"top\">张三</th>\n",
+       "      <th>期中</th>\n",
+       "      <td>73</td>\n",
+       "      <td>5</td>\n",
+       "      <td>25</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>期末</th>\n",
+       "      <td>37</td>\n",
+       "      <td>36</td>\n",
+       "      <td>56</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       Python  En  Math\n",
+       "张三 期中      73   5    25\n",
+       "   期末      37  36    56"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.iloc[[0,1]]"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/Day76-90/code/5-pandas多层索引计算.ipynb
+++ b/Day76-90/code/5-pandas多层索引计算.ipynb
--- a/Day76-90/code/6-pandas数据集成.ipynb
+++ b/Day76-90/code/6-pandas数据集成.ipynb
--- a/Day76-90/code/7-pandas数据集成merge.ipynb
+++ b/Day76-90/code/7-pandas数据集成merge.ipynb
--- a/Day76-90/code/8-pandas分组聚合操作.ipynb
+++ b/Day76-90/code/8-pandas分组聚合操作.ipynb
@ -0,0 +1,877 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 分组聚合透视\n",
+    "# 很多时候属性是相似的\n",
+    "\n",
+    "import numpy as np\n",
+    "\n",
+    "import pandas as pd\n",
+    "\n",
+    "from pandas import Series,DataFrame"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Hand</th>\n",
+       "      <th>Smoke</th>\n",
+       "      <th>sex</th>\n",
+       "      <th>weight</th>\n",
+       "      <th>IQ</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>right</td>\n",
+       "      <td>yes</td>\n",
+       "      <td>male</td>\n",
+       "      <td>80</td>\n",
+       "      <td>100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>left</td>\n",
+       "      <td>yes</td>\n",
+       "      <td>female</td>\n",
+       "      <td>50</td>\n",
+       "      <td>120</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>left</td>\n",
+       "      <td>no</td>\n",
+       "      <td>female</td>\n",
+       "      <td>48</td>\n",
+       "      <td>90</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>right</td>\n",
+       "      <td>no</td>\n",
+       "      <td>male</td>\n",
+       "      <td>75</td>\n",
+       "      <td>130</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>right</td>\n",
+       "      <td>yes</td>\n",
+       "      <td>male</td>\n",
+       "      <td>68</td>\n",
+       "      <td>140</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>right</td>\n",
+       "      <td>no</td>\n",
+       "      <td>male</td>\n",
+       "      <td>100</td>\n",
+       "      <td>80</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>right</td>\n",
+       "      <td>no</td>\n",
+       "      <td>female</td>\n",
+       "      <td>40</td>\n",
+       "      <td>94</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>right</td>\n",
+       "      <td>no</td>\n",
+       "      <td>female</td>\n",
+       "      <td>90</td>\n",
+       "      <td>110</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>left</td>\n",
+       "      <td>no</td>\n",
+       "      <td>male</td>\n",
+       "      <td>88</td>\n",
+       "      <td>100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>right</td>\n",
+       "      <td>yes</td>\n",
+       "      <td>female</td>\n",
+       "      <td>76</td>\n",
+       "      <td>160</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    Hand Smoke     sex  weight   IQ\n",
+       "0  right   yes    male      80  100\n",
+       "1   left   yes  female      50  120\n",
+       "2   left    no  female      48   90\n",
+       "3  right    no    male      75  130\n",
+       "4  right   yes    male      68  140\n",
+       "5  right    no    male     100   80\n",
+       "6  right    no  female      40   94\n",
+       "7  right    no  female      90  110\n",
+       "8   left    no    male      88  100\n",
+       "9  right   yes  female      76  160"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# 走右手习惯，是否抽烟，性别，对体重，智商，有一定影响\n",
+    "\n",
+    "df = DataFrame({'Hand':['right','left','left','right','right','right','right','right','left','right'],\n",
+    "               'Smoke':['yes','yes','no','no','yes','no','no','no','no','yes'],\n",
+    "               'sex':['male','female','female','male','male','male','female','female','male','female'],\n",
+    "               'weight':[80,50,48,75,68,100,40,90,88,76],\n",
+    "               'IQ':[100,120,90,130,140,80,94,110,100,160]})\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 分组聚合查看规律，某一条件下规律"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>weight</th>\n",
+       "      <th>IQ</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Hand</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>left</th>\n",
+       "      <td>62.0</td>\n",
+       "      <td>103.3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>right</th>\n",
+       "      <td>75.6</td>\n",
+       "      <td>116.3</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       weight     IQ\n",
+       "Hand                \n",
+       "left     62.0  103.3\n",
+       "right    75.6  116.3"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data = df.groupby(by = ['Hand'])[['weight','IQ']].mean().round(1)\n",
+    "data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>weight</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Hand</th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>left</th>\n",
+       "      <td>62.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>right</th>\n",
+       "      <td>75.6</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       weight\n",
+       "Hand         \n",
+       "left     62.0\n",
+       "right    75.6"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.groupby(by = ['Hand'])[['weight']].apply(np.mean).round(1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df2 = df.groupby(by = ['Hand'])[['weight']].transform(np.mean).round(1)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>weight_mean</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>75.6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>62.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>62.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>75.6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>75.6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>75.6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>75.6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>75.6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>62.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>75.6</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   weight_mean\n",
+       "0         75.6\n",
+       "1         62.0\n",
+       "2         62.0\n",
+       "3         75.6\n",
+       "4         75.6\n",
+       "5         75.6\n",
+       "6         75.6\n",
+       "7         75.6\n",
+       "8         62.0\n",
+       "9         75.6"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df2 = df2.add_suffix('_mean')\n",
+    "df2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Hand</th>\n",
+       "      <th>Smoke</th>\n",
+       "      <th>sex</th>\n",
+       "      <th>weight</th>\n",
+       "      <th>IQ</th>\n",
+       "      <th>weight_mean</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>right</td>\n",
+       "      <td>yes</td>\n",
+       "      <td>male</td>\n",
+       "      <td>80</td>\n",
+       "      <td>100</td>\n",
+       "      <td>75.6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>left</td>\n",
+       "      <td>yes</td>\n",
+       "      <td>female</td>\n",
+       "      <td>50</td>\n",
+       "      <td>120</td>\n",
+       "      <td>62.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>left</td>\n",
+       "      <td>no</td>\n",
+       "      <td>female</td>\n",
+       "      <td>48</td>\n",
+       "      <td>90</td>\n",
+       "      <td>62.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>right</td>\n",
+       "      <td>no</td>\n",
+       "      <td>male</td>\n",
+       "      <td>75</td>\n",
+       "      <td>130</td>\n",
+       "      <td>75.6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>right</td>\n",
+       "      <td>yes</td>\n",
+       "      <td>male</td>\n",
+       "      <td>68</td>\n",
+       "      <td>140</td>\n",
+       "      <td>75.6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>right</td>\n",
+       "      <td>no</td>\n",
+       "      <td>male</td>\n",
+       "      <td>100</td>\n",
+       "      <td>80</td>\n",
+       "      <td>75.6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>right</td>\n",
+       "      <td>no</td>\n",
+       "      <td>female</td>\n",
+       "      <td>40</td>\n",
+       "      <td>94</td>\n",
+       "      <td>75.6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>right</td>\n",
+       "      <td>no</td>\n",
+       "      <td>female</td>\n",
+       "      <td>90</td>\n",
+       "      <td>110</td>\n",
+       "      <td>75.6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>left</td>\n",
+       "      <td>no</td>\n",
+       "      <td>male</td>\n",
+       "      <td>88</td>\n",
+       "      <td>100</td>\n",
+       "      <td>62.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>right</td>\n",
+       "      <td>yes</td>\n",
+       "      <td>female</td>\n",
+       "      <td>76</td>\n",
+       "      <td>160</td>\n",
+       "      <td>75.6</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    Hand Smoke     sex  weight   IQ  weight_mean\n",
+       "0  right   yes    male      80  100         75.6\n",
+       "1   left   yes  female      50  120         62.0\n",
+       "2   left    no  female      48   90         62.0\n",
+       "3  right    no    male      75  130         75.6\n",
+       "4  right   yes    male      68  140         75.6\n",
+       "5  right    no    male     100   80         75.6\n",
+       "6  right    no  female      40   94         75.6\n",
+       "7  right    no  female      90  110         75.6\n",
+       "8   left    no    male      88  100         62.0\n",
+       "9  right   yes  female      76  160         75.6"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df3 = df.merge(df2,left_index=True,right_index=True)\n",
+    "df3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Hand\n",
+       "left     ([3, 3], [62.0, 103.3])\n",
+       "right    ([7, 7], [75.6, 116.3])\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def count(x):\n",
+    "    \n",
+    "    return (x.count(),x.mean().round(1))\n",
+    "\n",
+    "df.groupby(by = ['Hand'])[['weight','IQ']].apply(count)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>IQ</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Hand</th>\n",
+       "      <th>sex</th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"2\" valign=\"top\">left</th>\n",
+       "      <th>female</th>\n",
+       "      <td>120</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>male</th>\n",
+       "      <td>100</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"2\" valign=\"top\">right</th>\n",
+       "      <th>female</th>\n",
+       "      <td>160</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>male</th>\n",
+       "      <td>140</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "               IQ\n",
+       "Hand  sex        \n",
+       "left  female  120\n",
+       "      male    100\n",
+       "right female  160\n",
+       "      male    140"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.groupby(by = ['Hand','sex'])[['IQ']].max()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000019E24051EF0>"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data = df.groupby(by = ['Hand'])['IQ','weight']\n",
+    "data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr:last-of-type th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr>\n",
+       "      <th></th>\n",
+       "      <th colspan=\"2\" halign=\"left\">IQ</th>\n",
+       "      <th colspan=\"2\" halign=\"left\">weight</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th></th>\n",
+       "      <th>max</th>\n",
+       "      <th>mean</th>\n",
+       "      <th>max</th>\n",
+       "      <th>mean</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Hand</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>left</th>\n",
+       "      <td>120</td>\n",
+       "      <td>103.3</td>\n",
+       "      <td>88</td>\n",
+       "      <td>62.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>right</th>\n",
+       "      <td>160</td>\n",
+       "      <td>116.3</td>\n",
+       "      <td>100</td>\n",
+       "      <td>75.6</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        IQ        weight      \n",
+       "       max   mean    max  mean\n",
+       "Hand                          \n",
+       "left   120  103.3     88  62.0\n",
+       "right  160  116.3    100  75.6"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data.agg(['max','mean']).round(1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>IQ</th>\n",
+       "      <th>weight</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Hand</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>left</th>\n",
+       "      <td>120</td>\n",
+       "      <td>62.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>right</th>\n",
+       "      <td>160</td>\n",
+       "      <td>75.6</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        IQ  weight\n",
+       "Hand              \n",
+       "left   120    62.0\n",
+       "right  160    75.6"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data.agg({'IQ':'max','weight':'mean'}).round(1)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/Day76-90/code/9-pandas数据集成实战.ipynb
+++ b/Day76-90/code/9-pandas数据集成实战.ipynb
--- a/Day76-90/code/cancer_predict.npy
+++ b/Day76-90/code/cancer_predict.npy
--- a/Day76-90/code/cancer_true.npy
+++ b/Day76-90/code/cancer_true.npy
--- a/Day76-90/code/state-abbrevs.csv
+++ b/Day76-90/code/state-abbrevs.csv
@ -0,0 +1,52 @@
+"state","abbreviation"
+"Alabama","AL"
+"Alaska","AK"
+"Arizona","AZ"
+"Arkansas","AR"
+"California","CA"
+"Colorado","CO"
+"Connecticut","CT"
+"Delaware","DE"
+"District of Columbia","DC"
+"Florida","FL"
+"Georgia","GA"
+"Hawaii","HI"
+"Idaho","ID"
+"Illinois","IL"
+"Indiana","IN"
+"Iowa","IA"
+"Kansas","KS"
+"Kentucky","KY"
+"Louisiana","LA"
+"Maine","ME"
+"Montana","MT"
+"Nebraska","NE"
+"Nevada","NV"
+"New Hampshire","NH"
+"New Jersey","NJ"
+"New Mexico","NM"
+"New York","NY"
+"North Carolina","NC"
+"North Dakota","ND"
+"Ohio","OH"
+"Oklahoma","OK"
+"Oregon","OR"
+"Maryland","MD"
+"Massachusetts","MA"
+"Michigan","MI"
+"Minnesota","MN"
+"Mississippi","MS"
+"Missouri","MO"
+"Pennsylvania","PA"
+"Rhode Island","RI"
+"South Carolina","SC"
+"South Dakota","SD"
+"Tennessee","TN"
+"Texas","TX"
+"Utah","UT"
+"Vermont","VT"
+"Virginia","VA"
+"Washington","WA"
+"West Virginia","WV"
+"Wisconsin","WI"
+"Wyoming","WY"
--- a/Day76-90/code/state-areas.csv
+++ b/Day76-90/code/state-areas.csv
@ -0,0 +1,53 @@
+state,area (sq. mi)
+Alabama,52423
+Alaska,656425
+Arizona,114006
+Arkansas,53182
+California,163707
+Colorado,104100
+Connecticut,5544
+Delaware,1954
+Florida,65758
+Georgia,59441
+Hawaii,10932
+Idaho,83574
+Illinois,57918
+Indiana,36420
+Iowa,56276
+Kansas,82282
+Kentucky,40411
+Louisiana,51843
+Maine,35387
+Maryland,12407
+Massachusetts,10555
+Michigan,96810
+Minnesota,86943
+Mississippi,48434
+Missouri,69709
+Montana,147046
+Nebraska,77358
+Nevada,110567
+New Hampshire,9351
+New Jersey,8722
+New Mexico,121593
+New York,54475
+North Carolina,53821
+North Dakota,70704
+Ohio,44828
+Oklahoma,69903
+Oregon,98386
+Pennsylvania,46058
+Rhode Island,1545
+South Carolina,32007
+South Dakota,77121
+Tennessee,42146
+Texas,268601
+Utah,84904
+Vermont,9615
+Virginia,42769
+Washington,71303
+West Virginia,24231
+Wisconsin,65503
+Wyoming,97818
+District of Columbia,68
+Puerto Rico,3515
--- a/Day76-90/code/state-population.csv
+++ b/Day76-90/code/state-population.csv