'pandas使用'

This commit is contained in:
softpo 2019-05-16 11:59:06 +08:00
parent aa359771ff
commit 43c147dad1
15 changed files with 22810 additions and 0 deletions

View File

@ -1,2 +1,25 @@
## Pandas的应用
### 1、pandas入门
### 2、pandas索引
### 3、pandas数据清洗之空数据
[数据挖掘之空数据处理(有史以来最全面)]: https://blog.csdn.net/Soft_Po/article/details/89302887
### 4、pandas多层索引
### 5、pandas多层索引计算
### 6、pandas数据集成concat
### 7、pandas数据集成merge
### 8、pandas分组聚合操作
### 9、pandas数据集成实战
### 10、美国大选项目
[2012美国大选政治献金项目数据分析有史以来最全面]: https://blog.csdn.net/Soft_Po/article/details/89283382

View File

@ -0,0 +1,631 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"from pandas import Series,DataFrame"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"Math 120\n",
"Python 136\n",
"En 128\n",
"Chinese 99\n",
"dtype: int64"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 创建\n",
"# Series是一维的数据\n",
"s = Series(data = [120,136,128,99],index = ['Math','Python','En','Chinese'])\n",
"s"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(4,)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.shape"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([120, 136, 128, 99], dtype=int64)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"v = s.values\n",
"v"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"numpy.ndarray"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(v)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"120.75"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.mean()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"136"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.max()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"15.903353943953666"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.std()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/plain": [
"Math 14400\n",
"Python 18496\n",
"En 16384\n",
"Chinese 9801\n",
"dtype: int64"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"s.pow(2)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>En</th>\n",
" <th>Math</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>a</th>\n",
" <td>113</td>\n",
" <td>116</td>\n",
" <td>75</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b</th>\n",
" <td>19</td>\n",
" <td>145</td>\n",
" <td>23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c</th>\n",
" <td>57</td>\n",
" <td>107</td>\n",
" <td>113</td>\n",
" </tr>\n",
" <tr>\n",
" <th>d</th>\n",
" <td>95</td>\n",
" <td>3</td>\n",
" <td>66</td>\n",
" </tr>\n",
" <tr>\n",
" <th>e</th>\n",
" <td>28</td>\n",
" <td>121</td>\n",
" <td>120</td>\n",
" </tr>\n",
" <tr>\n",
" <th>f</th>\n",
" <td>141</td>\n",
" <td>85</td>\n",
" <td>132</td>\n",
" </tr>\n",
" <tr>\n",
" <th>h</th>\n",
" <td>124</td>\n",
" <td>39</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>i</th>\n",
" <td>80</td>\n",
" <td>35</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>j</th>\n",
" <td>68</td>\n",
" <td>99</td>\n",
" <td>31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>k</th>\n",
" <td>74</td>\n",
" <td>12</td>\n",
" <td>11</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python En Math\n",
"a 113 116 75\n",
"b 19 145 23\n",
"c 57 107 113\n",
"d 95 3 66\n",
"e 28 121 120\n",
"f 141 85 132\n",
"h 124 39 10\n",
"i 80 35 17\n",
"j 68 99 31\n",
"k 74 12 11"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# DataFrame是二维的数据\n",
"# excel就非诚相似\n",
"# 所有进行数据分析,数据挖掘的工具最基础的结果:行和列,行表示样本,列表示的是属性\n",
"df = DataFrame(data = np.random.randint(0,150,size = (10,3)),index = list('abcdefhijk'),columns=['Python','En','Math'])\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"(10, 3)"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([[113, 116, 75],\n",
" [ 19, 145, 23],\n",
" [ 57, 107, 113],\n",
" [ 95, 3, 66],\n",
" [ 28, 121, 120],\n",
" [141, 85, 132],\n",
" [124, 39, 10],\n",
" [ 80, 35, 17],\n",
" [ 68, 99, 31],\n",
" [ 74, 12, 11]])"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"v = df.values\n",
"v"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"Python 79.9\n",
"En 76.2\n",
"Math 59.8\n",
"dtype: float64"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.mean()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"Python 141\n",
"En 145\n",
"Math 132\n",
"dtype: int32"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.max()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>En</th>\n",
" <th>Math</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>a</th>\n",
" <td>113</td>\n",
" <td>116</td>\n",
" <td>75</td>\n",
" </tr>\n",
" <tr>\n",
" <th>b</th>\n",
" <td>19</td>\n",
" <td>145</td>\n",
" <td>23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>c</th>\n",
" <td>57</td>\n",
" <td>107</td>\n",
" <td>113</td>\n",
" </tr>\n",
" <tr>\n",
" <th>d</th>\n",
" <td>95</td>\n",
" <td>3</td>\n",
" <td>66</td>\n",
" </tr>\n",
" <tr>\n",
" <th>e</th>\n",
" <td>28</td>\n",
" <td>121</td>\n",
" <td>120</td>\n",
" </tr>\n",
" <tr>\n",
" <th>f</th>\n",
" <td>141</td>\n",
" <td>85</td>\n",
" <td>132</td>\n",
" </tr>\n",
" <tr>\n",
" <th>h</th>\n",
" <td>124</td>\n",
" <td>39</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>i</th>\n",
" <td>80</td>\n",
" <td>35</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>j</th>\n",
" <td>68</td>\n",
" <td>99</td>\n",
" <td>31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>k</th>\n",
" <td>74</td>\n",
" <td>12</td>\n",
" <td>11</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python En Math\n",
"a 113 116 75\n",
"b 19 145 23\n",
"c 57 107 113\n",
"d 95 3 66\n",
"e 28 121 120\n",
"f 141 85 132\n",
"h 124 39 10\n",
"i 80 35 17\n",
"j 68 99 31\n",
"k 74 12 11"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"Python 79.9\n",
"En 76.2\n",
"Math 59.8\n",
"dtype: float64"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.mean(axis = 0)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"a 101.333333\n",
"b 62.333333\n",
"c 92.333333\n",
"d 54.666667\n",
"e 89.666667\n",
"f 119.333333\n",
"h 57.666667\n",
"i 44.000000\n",
"j 66.000000\n",
"k 32.333333\n",
"dtype: float64"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.mean(axis = 1)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,568 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"import pandas as pd\n",
"# 数据分析BI-------->人工智能AI\n",
"# 数据分析和数据挖掘一个意思,\n",
"# 工具和软件Excel 免费版\n",
"# SPSS一人一年10000、SAS一人一年5000、Matlab 收费\n",
"# R、Python全方位语言流行 免费\n",
"# Python + numpy + scipy + pandas + matplotlib + seaborn + pyEcharts + sklearn + kereasTensorflow+…… \n",
"# 代码,自动化(数据输入----输出结果)\n",
"from pandas import Series,DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/plain": [
"a 63\n",
"b 107\n",
"c 16\n",
"d 35\n",
"e 140\n",
"f 83\n",
"dtype: int32"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 多层索引,行列\n",
"# 单层索引\n",
"s = Series(np.random.randint(0,150,size = 6),index=list('abcdef'))\n",
"s"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"张三 期中 114\n",
" 期末 131\n",
"李四 期中 3\n",
" 期末 63\n",
"王五 期中 107\n",
" 期末 34\n",
"dtype: int32"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 多层索引,两层,三层以上(规则一样)\n",
"s2 = Series(np.random.randint(0,150,size = 6),index = pd.MultiIndex.from_product([['张三','李四','王五'],['期中','期末']]))\n",
"s2"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>En</th>\n",
" <th>Math</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">张三</th>\n",
" <th>期中</th>\n",
" <td>73</td>\n",
" <td>5</td>\n",
" <td>25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>期末</th>\n",
" <td>37</td>\n",
" <td>36</td>\n",
" <td>56</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">李四</th>\n",
" <th>期中</th>\n",
" <td>149</td>\n",
" <td>81</td>\n",
" <td>142</td>\n",
" </tr>\n",
" <tr>\n",
" <th>期末</th>\n",
" <td>71</td>\n",
" <td>138</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">王五</th>\n",
" <th>期中</th>\n",
" <td>11</td>\n",
" <td>94</td>\n",
" <td>103</td>\n",
" </tr>\n",
" <tr>\n",
" <th>期末</th>\n",
" <td>25</td>\n",
" <td>121</td>\n",
" <td>83</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python En Math\n",
"张三 期中 73 5 25\n",
" 期末 37 36 56\n",
"李四 期中 149 81 142\n",
" 期末 71 138 0\n",
"王五 期中 11 94 103\n",
" 期末 25 121 83"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = DataFrame(np.random.randint(0,150,size = (6,3)),columns=['Python','En','Math'],index =pd.MultiIndex.from_product([['张三','李四','王五'],['期中','期末']]) )\n",
"\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>En</th>\n",
" <th>Math</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"4\" valign=\"top\">张三</th>\n",
" <th rowspan=\"2\" valign=\"top\">期中</th>\n",
" <th>A</th>\n",
" <td>15</td>\n",
" <td>31</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>82</td>\n",
" <td>56</td>\n",
" <td>123</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">期末</th>\n",
" <th>A</th>\n",
" <td>14</td>\n",
" <td>2</td>\n",
" <td>78</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>69</td>\n",
" <td>50</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"4\" valign=\"top\">李四</th>\n",
" <th rowspan=\"2\" valign=\"top\">期中</th>\n",
" <th>A</th>\n",
" <td>91</td>\n",
" <td>87</td>\n",
" <td>143</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>120</td>\n",
" <td>118</td>\n",
" <td>39</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">期末</th>\n",
" <th>A</th>\n",
" <td>56</td>\n",
" <td>76</td>\n",
" <td>55</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>11</td>\n",
" <td>105</td>\n",
" <td>121</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"4\" valign=\"top\">王五</th>\n",
" <th rowspan=\"2\" valign=\"top\">期中</th>\n",
" <th>A</th>\n",
" <td>147</td>\n",
" <td>78</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>128</td>\n",
" <td>126</td>\n",
" <td>146</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">期末</th>\n",
" <th>A</th>\n",
" <td>49</td>\n",
" <td>45</td>\n",
" <td>114</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>121</td>\n",
" <td>26</td>\n",
" <td>77</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python En Math\n",
"张三 期中 A 15 31 17\n",
" B 82 56 123\n",
" 期末 A 14 2 78\n",
" B 69 50 17\n",
"李四 期中 A 91 87 143\n",
" B 120 118 39\n",
" 期末 A 56 76 55\n",
" B 11 105 121\n",
"王五 期中 A 147 78 1\n",
" B 128 126 146\n",
" 期末 A 49 45 114\n",
" B 121 26 77"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 三层索引\n",
"df3 = DataFrame(np.random.randint(0,150,size = (12,3)),columns=['Python','En','Math'],index =pd.MultiIndex.from_product([['张三','李四','王五'],['期中','期末'],['A','B']]) )\n",
"\n",
"df3"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"73"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 先获取列后获取行\n",
"df['Python']['张三']['期中']"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"df2 = df.copy()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>En</th>\n",
" <th>Math</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">张三</th>\n",
" <th>期中</th>\n",
" <td>73</td>\n",
" <td>5</td>\n",
" <td>25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>期末</th>\n",
" <td>37</td>\n",
" <td>36</td>\n",
" <td>56</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">李四</th>\n",
" <th>期中</th>\n",
" <td>149</td>\n",
" <td>81</td>\n",
" <td>142</td>\n",
" </tr>\n",
" <tr>\n",
" <th>期末</th>\n",
" <td>71</td>\n",
" <td>138</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">王五</th>\n",
" <th>期中</th>\n",
" <td>11</td>\n",
" <td>94</td>\n",
" <td>103</td>\n",
" </tr>\n",
" <tr>\n",
" <th>期末</th>\n",
" <td>25</td>\n",
" <td>121</td>\n",
" <td>83</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python En Math\n",
"张三 期中 73 5 25\n",
" 期末 37 36 56\n",
"李四 期中 149 81 142\n",
" 期末 71 138 0\n",
"王五 期中 11 94 103\n",
" 期末 25 121 83"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.sort_index()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"73"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 先获取行,后获取列\n",
"df.loc['张三'].loc['期中']['Python']"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>En</th>\n",
" <th>Math</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">张三</th>\n",
" <th>期中</th>\n",
" <td>73</td>\n",
" <td>5</td>\n",
" <td>25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>期末</th>\n",
" <td>37</td>\n",
" <td>36</td>\n",
" <td>56</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python En Math\n",
"张三 期中 73 5 25\n",
" 期末 37 36 56"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[[0,1]]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,877 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# 分组聚合透视\n",
"# 很多时候属性是相似的\n",
"\n",
"import numpy as np\n",
"\n",
"import pandas as pd\n",
"\n",
"from pandas import Series,DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Hand</th>\n",
" <th>Smoke</th>\n",
" <th>sex</th>\n",
" <th>weight</th>\n",
" <th>IQ</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>right</td>\n",
" <td>yes</td>\n",
" <td>male</td>\n",
" <td>80</td>\n",
" <td>100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>left</td>\n",
" <td>yes</td>\n",
" <td>female</td>\n",
" <td>50</td>\n",
" <td>120</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>left</td>\n",
" <td>no</td>\n",
" <td>female</td>\n",
" <td>48</td>\n",
" <td>90</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>right</td>\n",
" <td>no</td>\n",
" <td>male</td>\n",
" <td>75</td>\n",
" <td>130</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>right</td>\n",
" <td>yes</td>\n",
" <td>male</td>\n",
" <td>68</td>\n",
" <td>140</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>right</td>\n",
" <td>no</td>\n",
" <td>male</td>\n",
" <td>100</td>\n",
" <td>80</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>right</td>\n",
" <td>no</td>\n",
" <td>female</td>\n",
" <td>40</td>\n",
" <td>94</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>right</td>\n",
" <td>no</td>\n",
" <td>female</td>\n",
" <td>90</td>\n",
" <td>110</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>left</td>\n",
" <td>no</td>\n",
" <td>male</td>\n",
" <td>88</td>\n",
" <td>100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>right</td>\n",
" <td>yes</td>\n",
" <td>female</td>\n",
" <td>76</td>\n",
" <td>160</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Hand Smoke sex weight IQ\n",
"0 right yes male 80 100\n",
"1 left yes female 50 120\n",
"2 left no female 48 90\n",
"3 right no male 75 130\n",
"4 right yes male 68 140\n",
"5 right no male 100 80\n",
"6 right no female 40 94\n",
"7 right no female 90 110\n",
"8 left no male 88 100\n",
"9 right yes female 76 160"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 走右手习惯,是否抽烟,性别,对体重,智商,有一定影响\n",
"\n",
"df = DataFrame({'Hand':['right','left','left','right','right','right','right','right','left','right'],\n",
" 'Smoke':['yes','yes','no','no','yes','no','no','no','no','yes'],\n",
" 'sex':['male','female','female','male','male','male','female','female','male','female'],\n",
" 'weight':[80,50,48,75,68,100,40,90,88,76],\n",
" 'IQ':[100,120,90,130,140,80,94,110,100,160]})\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 分组聚合查看规律,某一条件下规律"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>weight</th>\n",
" <th>IQ</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Hand</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>left</th>\n",
" <td>62.0</td>\n",
" <td>103.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>right</th>\n",
" <td>75.6</td>\n",
" <td>116.3</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" weight IQ\n",
"Hand \n",
"left 62.0 103.3\n",
"right 75.6 116.3"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = df.groupby(by = ['Hand'])[['weight','IQ']].mean().round(1)\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>weight</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Hand</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>left</th>\n",
" <td>62.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>right</th>\n",
" <td>75.6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" weight\n",
"Hand \n",
"left 62.0\n",
"right 75.6"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.groupby(by = ['Hand'])[['weight']].apply(np.mean).round(1)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"df2 = df.groupby(by = ['Hand'])[['weight']].transform(np.mean).round(1)\n"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>weight_mean</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>75.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>62.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>62.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>75.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>75.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>75.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>75.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>75.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>62.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>75.6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" weight_mean\n",
"0 75.6\n",
"1 62.0\n",
"2 62.0\n",
"3 75.6\n",
"4 75.6\n",
"5 75.6\n",
"6 75.6\n",
"7 75.6\n",
"8 62.0\n",
"9 75.6"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2 = df2.add_suffix('_mean')\n",
"df2"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Hand</th>\n",
" <th>Smoke</th>\n",
" <th>sex</th>\n",
" <th>weight</th>\n",
" <th>IQ</th>\n",
" <th>weight_mean</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>right</td>\n",
" <td>yes</td>\n",
" <td>male</td>\n",
" <td>80</td>\n",
" <td>100</td>\n",
" <td>75.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>left</td>\n",
" <td>yes</td>\n",
" <td>female</td>\n",
" <td>50</td>\n",
" <td>120</td>\n",
" <td>62.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>left</td>\n",
" <td>no</td>\n",
" <td>female</td>\n",
" <td>48</td>\n",
" <td>90</td>\n",
" <td>62.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>right</td>\n",
" <td>no</td>\n",
" <td>male</td>\n",
" <td>75</td>\n",
" <td>130</td>\n",
" <td>75.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>right</td>\n",
" <td>yes</td>\n",
" <td>male</td>\n",
" <td>68</td>\n",
" <td>140</td>\n",
" <td>75.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>right</td>\n",
" <td>no</td>\n",
" <td>male</td>\n",
" <td>100</td>\n",
" <td>80</td>\n",
" <td>75.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>right</td>\n",
" <td>no</td>\n",
" <td>female</td>\n",
" <td>40</td>\n",
" <td>94</td>\n",
" <td>75.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>right</td>\n",
" <td>no</td>\n",
" <td>female</td>\n",
" <td>90</td>\n",
" <td>110</td>\n",
" <td>75.6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>left</td>\n",
" <td>no</td>\n",
" <td>male</td>\n",
" <td>88</td>\n",
" <td>100</td>\n",
" <td>62.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>right</td>\n",
" <td>yes</td>\n",
" <td>female</td>\n",
" <td>76</td>\n",
" <td>160</td>\n",
" <td>75.6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Hand Smoke sex weight IQ weight_mean\n",
"0 right yes male 80 100 75.6\n",
"1 left yes female 50 120 62.0\n",
"2 left no female 48 90 62.0\n",
"3 right no male 75 130 75.6\n",
"4 right yes male 68 140 75.6\n",
"5 right no male 100 80 75.6\n",
"6 right no female 40 94 75.6\n",
"7 right no female 90 110 75.6\n",
"8 left no male 88 100 62.0\n",
"9 right yes female 76 160 75.6"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df3 = df.merge(df2,left_index=True,right_index=True)\n",
"df3"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"Hand\n",
"left ([3, 3], [62.0, 103.3])\n",
"right ([7, 7], [75.6, 116.3])\n",
"dtype: object"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def count(x):\n",
" \n",
" return (x.count(),x.mean().round(1))\n",
"\n",
"df.groupby(by = ['Hand'])[['weight','IQ']].apply(count)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>IQ</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Hand</th>\n",
" <th>sex</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">left</th>\n",
" <th>female</th>\n",
" <td>120</td>\n",
" </tr>\n",
" <tr>\n",
" <th>male</th>\n",
" <td>100</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">right</th>\n",
" <th>female</th>\n",
" <td>160</td>\n",
" </tr>\n",
" <tr>\n",
" <th>male</th>\n",
" <td>140</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" IQ\n",
"Hand sex \n",
"left female 120\n",
" male 100\n",
"right female 160\n",
" male 140"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.groupby(by = ['Hand','sex'])[['IQ']].max()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000019E24051EF0>"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = df.groupby(by = ['Hand'])['IQ','weight']\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th colspan=\"2\" halign=\"left\">IQ</th>\n",
" <th colspan=\"2\" halign=\"left\">weight</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>max</th>\n",
" <th>mean</th>\n",
" <th>max</th>\n",
" <th>mean</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Hand</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>left</th>\n",
" <td>120</td>\n",
" <td>103.3</td>\n",
" <td>88</td>\n",
" <td>62.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>right</th>\n",
" <td>160</td>\n",
" <td>116.3</td>\n",
" <td>100</td>\n",
" <td>75.6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" IQ weight \n",
" max mean max mean\n",
"Hand \n",
"left 120 103.3 88 62.0\n",
"right 160 116.3 100 75.6"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.agg(['max','mean']).round(1)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>IQ</th>\n",
" <th>weight</th>\n",
" </tr>\n",
" <tr>\n",
" <th>Hand</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>left</th>\n",
" <td>120</td>\n",
" <td>62.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>right</th>\n",
" <td>160</td>\n",
" <td>75.6</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" IQ weight\n",
"Hand \n",
"left 120 62.0\n",
"right 160 75.6"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.agg({'IQ':'max','weight':'mean'}).round(1)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because it is too large Load Diff

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,52 @@
"state","abbreviation"
"Alabama","AL"
"Alaska","AK"
"Arizona","AZ"
"Arkansas","AR"
"California","CA"
"Colorado","CO"
"Connecticut","CT"
"Delaware","DE"
"District of Columbia","DC"
"Florida","FL"
"Georgia","GA"
"Hawaii","HI"
"Idaho","ID"
"Illinois","IL"
"Indiana","IN"
"Iowa","IA"
"Kansas","KS"
"Kentucky","KY"
"Louisiana","LA"
"Maine","ME"
"Montana","MT"
"Nebraska","NE"
"Nevada","NV"
"New Hampshire","NH"
"New Jersey","NJ"
"New Mexico","NM"
"New York","NY"
"North Carolina","NC"
"North Dakota","ND"
"Ohio","OH"
"Oklahoma","OK"
"Oregon","OR"
"Maryland","MD"
"Massachusetts","MA"
"Michigan","MI"
"Minnesota","MN"
"Mississippi","MS"
"Missouri","MO"
"Pennsylvania","PA"
"Rhode Island","RI"
"South Carolina","SC"
"South Dakota","SD"
"Tennessee","TN"
"Texas","TX"
"Utah","UT"
"Vermont","VT"
"Virginia","VA"
"Washington","WA"
"West Virginia","WV"
"Wisconsin","WI"
"Wyoming","WY"
1 state abbreviation
2 Alabama AL
3 Alaska AK
4 Arizona AZ
5 Arkansas AR
6 California CA
7 Colorado CO
8 Connecticut CT
9 Delaware DE
10 District of Columbia DC
11 Florida FL
12 Georgia GA
13 Hawaii HI
14 Idaho ID
15 Illinois IL
16 Indiana IN
17 Iowa IA
18 Kansas KS
19 Kentucky KY
20 Louisiana LA
21 Maine ME
22 Montana MT
23 Nebraska NE
24 Nevada NV
25 New Hampshire NH
26 New Jersey NJ
27 New Mexico NM
28 New York NY
29 North Carolina NC
30 North Dakota ND
31 Ohio OH
32 Oklahoma OK
33 Oregon OR
34 Maryland MD
35 Massachusetts MA
36 Michigan MI
37 Minnesota MN
38 Mississippi MS
39 Missouri MO
40 Pennsylvania PA
41 Rhode Island RI
42 South Carolina SC
43 South Dakota SD
44 Tennessee TN
45 Texas TX
46 Utah UT
47 Vermont VT
48 Virginia VA
49 Washington WA
50 West Virginia WV
51 Wisconsin WI
52 Wyoming WY

View File

@ -0,0 +1,53 @@
state,area (sq. mi)
Alabama,52423
Alaska,656425
Arizona,114006
Arkansas,53182
California,163707
Colorado,104100
Connecticut,5544
Delaware,1954
Florida,65758
Georgia,59441
Hawaii,10932
Idaho,83574
Illinois,57918
Indiana,36420
Iowa,56276
Kansas,82282
Kentucky,40411
Louisiana,51843
Maine,35387
Maryland,12407
Massachusetts,10555
Michigan,96810
Minnesota,86943
Mississippi,48434
Missouri,69709
Montana,147046
Nebraska,77358
Nevada,110567
New Hampshire,9351
New Jersey,8722
New Mexico,121593
New York,54475
North Carolina,53821
North Dakota,70704
Ohio,44828
Oklahoma,69903
Oregon,98386
Pennsylvania,46058
Rhode Island,1545
South Carolina,32007
South Dakota,77121
Tennessee,42146
Texas,268601
Utah,84904
Vermont,9615
Virginia,42769
Washington,71303
West Virginia,24231
Wisconsin,65503
Wyoming,97818
District of Columbia,68
Puerto Rico,3515
1 state area (sq. mi)
2 Alabama 52423
3 Alaska 656425
4 Arizona 114006
5 Arkansas 53182
6 California 163707
7 Colorado 104100
8 Connecticut 5544
9 Delaware 1954
10 Florida 65758
11 Georgia 59441
12 Hawaii 10932
13 Idaho 83574
14 Illinois 57918
15 Indiana 36420
16 Iowa 56276
17 Kansas 82282
18 Kentucky 40411
19 Louisiana 51843
20 Maine 35387
21 Maryland 12407
22 Massachusetts 10555
23 Michigan 96810
24 Minnesota 86943
25 Mississippi 48434
26 Missouri 69709
27 Montana 147046
28 Nebraska 77358
29 Nevada 110567
30 New Hampshire 9351
31 New Jersey 8722
32 New Mexico 121593
33 New York 54475
34 North Carolina 53821
35 North Dakota 70704
36 Ohio 44828
37 Oklahoma 69903
38 Oregon 98386
39 Pennsylvania 46058
40 Rhode Island 1545
41 South Carolina 32007
42 South Dakota 77121
43 Tennessee 42146
44 Texas 268601
45 Utah 84904
46 Vermont 9615
47 Virginia 42769
48 Washington 71303
49 West Virginia 24231
50 Wisconsin 65503
51 Wyoming 97818
52 District of Columbia 68
53 Puerto Rico 3515

File diff suppressed because it is too large Load Diff