Python-100-Days/Day66-80/code/day06.ipynb

810 lines
23 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "markdown",
"id": "e4ad3216-d45c-4328-a509-3c01e0fec4d5",
"metadata": {},
"source": [
"## 深入浅出pandas"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5faacbe5-d44a-4e0e-a287-19270bc1a693",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"\n",
"plt.rcParams['font.sans-serif'].insert(0, 'SimHei')\n",
"plt.rcParams['axes.unicode_minus'] = False\n",
"get_ipython().run_line_magic('config', \"InlineBackend.figure_format = 'svg'\")"
]
},
{
"cell_type": "markdown",
"id": "9b6101ba-4d7b-408c-8f94-76cf789ab71a",
"metadata": {},
"source": [
"### 科比投篮数据分析\n",
"\n",
"1. 科比使用得最多的投篮动作\n",
"2. 科比交手次数最多的球队\n",
"3. 科比有出手的比赛有多少场\n",
"4. 科比职业生涯(常规赛+季后赛)总得分(不含罚篮)\n",
"5. 科比得分最高的五场比赛(对手、投篮次数、得分、命中率)\n",
"6. 科比得分最多的三个赛季(赛季、投篮次数、得分、命中率)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5008a34f-e108-4c35-a71e-0b28e2f4cbfc",
"metadata": {},
"outputs": [],
"source": [
"# 不限制最大显示的列数\n",
"pd.set_option('display.max_columns', None)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "526e6f01-7b18-487a-bd89-fdc5a5a494b0",
"metadata": {},
"outputs": [],
"source": [
"# 加载科比投篮数据\n",
"kobe_df = pd.read_csv('res/科比投篮数据.csv', index_col='shot_id')\n",
"kobe_df.tail(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "127b7ab9-568c-4982-8238-d5fcafe6e9b2",
"metadata": {},
"outputs": [],
"source": [
"kobe_df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4cbe2e96-0bbe-43a7-bafc-f5f956090ea4",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# 科比使用得最多的投篮动作是什么\n",
"kobe_df.action_type.value_counts().index[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "46bd394d-05f3-4adc-bc69-d8ef70e1f877",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"kobe_df.groupby('action_type')['action_type'].count().nlargest(1).index[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3ebb1eab-a675-4f2f-833b-ff7c4207ec77",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# 科比交手次数最多的球队是哪支队伍\n",
"kobe_df.drop_duplicates('game_id').opponent.value_counts().index[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6b53491a-1e04-4684-94de-ddcf4eba0a9d",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"kobe_df.drop_duplicates('game_id').groupby('opponent').opponent.count().idxmax()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bf1b9f70-23df-4103-87db-7727329eb679",
"metadata": {},
"outputs": [],
"source": [
"# 科比有出手的比赛有多少场\n",
"kobe_df.game_id.nunique()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "91dfb7c3-c2d8-4eff-9df9-8be558e054d1",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# 统计科比常规赛和季后赛的投篮命中率\n",
"temp = kobe_df.dropna().pivot_table(index=['playoffs', 'shot_type'], columns=['shot_made_flag'], values='game_id', aggfunc='count')\n",
"temp = temp.divide(temp.sum(axis=1), axis=0)\n",
"temp"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e998d2d7-1259-479d-b74e-6e9dccf8de36",
"metadata": {},
"outputs": [],
"source": [
"# 填充shot_made_flag字段的缺失值\n",
"def handle(x):\n",
" playoffs, shot_type, shot_made_flag = x\n",
" if np.isnan(shot_made_flag):\n",
" shot_made_flag = 1 if np.random.random() < temp.at[(playoffs, shot_type), 1.0] else 0\n",
" return shot_made_flag\n",
"\n",
"\n",
"kobe_df['shot_made_flag'] = kobe_df[['playoffs', 'shot_type', 'shot_made_flag']].apply(handle, axis=1).astype('?')\n",
"kobe_df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1fd5dbf5-e89f-4f6c-9867-cca583d08527",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# 处理得分字段\n",
"kobe_df['point'] = kobe_df.shot_type.str[0].astype('i8')\n",
"kobe_df['point'] = kobe_df[['shot_made_flag', 'point']].apply(lambda x: x.loc['point'] if x.loc['shot_made_flag'] else 0, axis=1)\n",
"kobe_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b431b8a2-600e-4ed2-8092-0ed8598043f4",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# 参考数据投篮命中数11719\n",
"kobe_df.shot_made_flag.sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a2fdf20e-54d2-43b4-a5b2-52b6f7e815c4",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# 参考数据不含罚篮的投篮得分25265\n",
"kobe_df.point.sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8929cfa5-2152-45ad-9ac4-ce9154ef8830",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# 科比得分最多的赛季是哪个赛季和分数\n",
"kobe_df.groupby('season').point.sum().nlargest(3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d5bd3ccf-6654-4c84-a295-4142fd24cebe",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# 获得得分最高的5场比赛的game_id\n",
"index = kobe_df.groupby('game_id').point.sum().nlargest(5).index.values\n",
"index"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e48b6dfb-c6f8-40c5-beda-a1e50ae742b4",
"metadata": {},
"outputs": [],
"source": [
"# 用布尔索引筛选数据\n",
"kobe_df[np.in1d(kobe_df.game_id, index)]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e6ba3500-a50f-400c-a2bc-7f04cf0e3393",
"metadata": {},
"outputs": [],
"source": [
"# 用query方法筛选数据\n",
"kobe_df.query('game_id in @index')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "171a9b52-e917-4b41-9472-80532cc1a72e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# 科比得分最高的五场比赛(对手、投篮次数、得分、命中率)\n",
"# 参考数据含罚篮TOR - 81分 / POR - 65分 / DAL - 62分 / NYK - 61分 / MEM - 60分 / UTA - 60分\n",
"df1 = kobe_df[np.in1d(kobe_df.game_id, index)].groupby(\n",
" 'game_id'\n",
")[['game_date', 'opponent', 'game_id', 'shot_made_flag', 'point']].agg({\n",
" 'game_date': 'max',\n",
" 'opponent': 'max',\n",
" 'game_id': 'count',\n",
" 'shot_made_flag': 'sum',\n",
" 'point': 'sum'\n",
"})\n",
"df1['rate'] = df1.shot_made_flag / df1.game_id\n",
"df1.drop(columns=['shot_made_flag'], inplace=True)\n",
"df1.reset_index(drop=True, inplace=True)\n",
"df1.set_index('game_date', inplace=True)\n",
"df1.rename(columns={'opponent': '对手', 'game_id': '出手次数', 'point': '得分', 'rate': '命中率'}, inplace=True)\n",
"df1.sort_values(by='得分', ascending=False).style.format(formatter={'命中率': '{:.2%}'})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a129218e-4ceb-4004-9b91-5668b8d3940f",
"metadata": {},
"outputs": [],
"source": [
"df2 = kobe_df.query('game_id in @index').groupby(\n",
" 'game_id'\n",
")[['game_date', 'opponent', 'game_id', 'shot_made_flag', 'point']].agg({\n",
" 'game_date': 'max',\n",
" 'opponent': 'max',\n",
" 'game_id': 'count',\n",
" 'shot_made_flag': 'sum',\n",
" 'point': 'sum'\n",
"})\n",
"df2['rate'] = df2.shot_made_flag / df2.game_id\n",
"df2.drop(columns=['shot_made_flag'], inplace=True)\n",
"df2.reset_index(drop=True, inplace=True)\n",
"df2.set_index('game_date', inplace=True)\n",
"df2.rename(columns={'opponent': '对手', 'game_id': '出手次数', 'point': '得分', 'rate': '命中率'}, inplace=True)\n",
"df2.sort_values(by='得分', ascending=False).style.format(formatter={'命中率': '{:.2%}'})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "473883f8-acfa-4d32-806d-4201c50106e1",
"metadata": {},
"outputs": [],
"source": [
"# 科比得分最多的三个赛季(赛季、投篮次数、得分、命中率)\n"
]
},
{
"cell_type": "markdown",
"id": "c1ae87f9-1698-48b8-be11-0edd47cae298",
"metadata": {},
"source": [
"### 深圳二手房数据分析\n",
"\n",
"1. 统计深圳二手房单价分布规律\n",
"2. 统计深圳二手房总价分布规律\n",
"3. 统计每个区总价和均价的均值\n",
"4. 深圳每个区单价Top3的商圈\n",
"5. 哪种户型的二手房数量最多\n",
"6. 总价Top10的二手房分布在哪些区"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "041ac635-9598-4a10-9e04-664678f23448",
"metadata": {},
"outputs": [],
"source": [
"sz_df = pd.read_csv('res/深圳二手房数据.csv')\n",
"sz_df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7298d0f3-d9ef-4cee-975d-8d720711d749",
"metadata": {},
"outputs": [],
"source": [
"sz_df.drop(columns='Unnamed: 0', inplace=True)\n",
"sz_df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f140da58-6846-4595-b365-359cdc5b7dd1",
"metadata": {},
"outputs": [],
"source": [
"# 修正列名\n",
"sz_df.rename(columns={'hourseType': 'house_type', 'hourseSize': 'house_size'}, inplace=True)\n",
"sz_df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "731f00a4-1db8-4aac-ba7c-7bf65ebc3b40",
"metadata": {},
"outputs": [],
"source": [
"# 将tax字段处理为bool类型\n",
"sz_df['tax'] = sz_df.tax.fillna('').astype('?')\n",
"sz_df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5b2bad9d-ae47-478d-8be1-139121742012",
"metadata": {},
"outputs": [],
"source": [
"sz_df.head(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4a780b30-c8d8-4b40-b416-99b1d6572e26",
"metadata": {},
"outputs": [],
"source": [
"# 获取描述性统计信息\n",
"sz_df.total_price.agg(['mean', 'max', 'min', 'skew', 'kurt'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a2b4a48c-9775-424f-bc10-4bc1d71b3813",
"metadata": {},
"outputs": [],
"source": [
"sz_df.unit_price.agg(['mean', 'max', 'min', 'skew', 'kurt'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c742fc0c-9b3c-4b8d-861a-879012677458",
"metadata": {},
"outputs": [],
"source": [
"sz_df.house_size.agg(['mean', 'max', 'min', 'skew', 'kurt'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7c6a64d4-7bd8-4d04-b9a9-3d787ba9c732",
"metadata": {},
"outputs": [],
"source": [
"sz_df[sz_df.unit_price < 10000]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "22989374-3017-48a5-9802-61459df9f8d6",
"metadata": {},
"outputs": [],
"source": [
"# 删除异常数据单价小于10000\n",
"sz_df.drop(index=sz_df[sz_df.unit_price < 10000].index, inplace=True)\n",
"sz_df.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "02a1b493-6d66-4e31-8298-bbe50eae9676",
"metadata": {},
"outputs": [],
"source": [
"# 删除面积在10平米以下200平米以上的房屋信息\n",
"sz_df.drop(index=sz_df.query('house_size < 10 or house_size > 200').index, inplace=True)\n",
"sz_df.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bf37b2b5-1493-4a2b-a16c-3df39a99fcf2",
"metadata": {},
"outputs": [],
"source": [
"# 添加一个总房间数字段\n",
"sz_df['rooms_num'] = sz_df.house_type.str.extract('(\\d+)室(\\d+)厅').astype('i8').sum(axis=1)\n",
"sz_df.rooms_num.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f605b5bd-9ccf-4946-b084-c05db4b1f78b",
"metadata": {},
"outputs": [],
"source": [
"sz_df.tail(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6f197094-8843-4575-bf87-8e6599d4dce9",
"metadata": {},
"outputs": [],
"source": [
"# 删除房间总数大于8个的房屋信息\n",
"sz_df.drop(index=sz_df[sz_df.rooms_num > 8].index, inplace=True)\n",
"sz_df.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "920d0e09-fa58-456a-9d3d-6329ef49ae67",
"metadata": {},
"outputs": [],
"source": [
"# 单价分布\n",
"sz_df.unit_price.plot(kind='hist', figsize=(9, 5), bins=15, ylabel='')\n",
"plt.xticks(np.arange(10000, 210001, 20000))\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2fd023d2-b7a9-456e-a58d-731512711484",
"metadata": {},
"outputs": [],
"source": [
"# 总价分布\n",
"sz_df.total_price.plot(kind='hist', figsize=(9, 5), bins=15, ylabel='')\n",
"plt.xticks(np.arange(100, 2901, 400))\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d1d68635-d7fa-44dc-97dc-8938cba1de80",
"metadata": {},
"outputs": [],
"source": [
"# 统计每个区总价和均价的均值\n",
"sz_df.pivot_table(\n",
" index='area',\n",
" values=['title', 'unit_price', 'total_price'],\n",
" aggfunc={'title': 'count', 'unit_price': 'mean', 'total_price': 'mean'}\n",
").round(1).sort_values(\n",
" 'unit_price', ascending=False\n",
").style.format(\n",
" formatter={\n",
" 'total_price': '¥{:.0f}万元',\n",
" 'unit_price': '¥{:,.0f}元'\n",
" }\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "538ebe18-432e-4951-8667-ce90cb73ae5e",
"metadata": {},
"outputs": [],
"source": [
"# 深圳每个区房屋平均单价Top3商圈\n",
"temp_df = sz_df.groupby(['area', 'position'])[['unit_price']].mean().round(1)\n",
"temp_df['rank'] = temp_df.unit_price.groupby('area').rank(method='dense', ascending=False).astype('i8')\n",
"temp_df.query('rank <= 3')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "04aaf617-240d-4fc2-b702-fa3e03978347",
"metadata": {},
"outputs": [],
"source": [
"# 深圳每个区房屋平均单价Top3商圈\n",
"temp_df = sz_df.groupby(['area', 'position'], as_index=False)[['unit_price']].mean().round(1)\n",
"temp_df = temp_df.groupby('area')[['position', 'unit_price']].apply(lambda x: x.nlargest(3, 'unit_price'))\n",
"temp_df.style.hide(level=1).format(formatter={'unit_price': '¥{:,.0f}元'})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "38f91365-6b40-40a1-b8bd-f0fd4cac9d50",
"metadata": {},
"outputs": [],
"source": [
"# 哪种户型的二手房数量最多\n",
"sz_df.groupby('house_type').house_type.count().nlargest(1).index[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4db07743-51cd-4007-8faf-7aa5714167d9",
"metadata": {},
"outputs": [],
"source": [
"# 总价Top10的二手房分布在哪些区\n",
"top10 = sz_df.total_price.nlargest(10).index.values\n",
"# 通过花式索引获取对应的行\n",
"sz_df.loc[top10].groupby('area').area.count()"
]
},
{
"cell_type": "markdown",
"id": "870a64c4-3164-4b96-b910-fc80b5e44853",
"metadata": {},
"source": [
"### 销售利润下滑诊断分析"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a1e63fa6-018e-4cea-9899-05bc4a4aaae2",
"metadata": {},
"outputs": [],
"source": [
"detail_df = pd.read_excel('res/商品销售明细表.xlsx', sheet_name='Sheet1')\n",
"outlet_df = pd.read_excel('res/门店信息维度表.xlsx', sheet_name='Sheet1')\n",
"commod_df = pd.read_excel('res/商品信息维度表.xlsx', sheet_name='Sheet1')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cb430f84-5628-4dc5-a63a-36520cfcf66a",
"metadata": {},
"outputs": [],
"source": [
"detail_df.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "41a2f93a-3799-4253-8acd-6f3498283010",
"metadata": {},
"outputs": [],
"source": [
"detail_df.rename(columns={'日期(年月日)': '销售日期'}, inplace=True)\n",
"detail_df['销售日期'] = pd.to_datetime(detail_df.销售日期)\n",
"detail_df['月份'] = detail_df.销售日期.dt.month\n",
"detail_df['利润额'] = detail_df.销售额 - detail_df.成本额\n",
"detail_df.head(5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5f94c5f3-c799-470f-a693-7d01afa4f88f",
"metadata": {},
"outputs": [],
"source": [
"temp1 = detail_df.groupby('月份')[['销售额', '利润额']].sum()\n",
"temp1['销售月环比'] = temp1.销售额.pct_change()\n",
"temp1['利润月环比'] = temp1.利润额.pct_change()\n",
"temp1[['销售额', '销售月环比', '利润额', '利润月环比']].style.format(\n",
" formatter={\n",
" '销售月环比': '{:.2%}',\n",
" '利润月环比': '{:.2%}',\n",
" },\n",
" na_rep='--------'\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "56b4ab4a-aa35-49c5-9387-80b545799cfe",
"metadata": {},
"outputs": [],
"source": [
"temp1.plot(kind='line', figsize=(9, 5), xlabel='', y=['销售额', '利润额'], color=['navy', 'coral'], marker='o')\n",
"plt.ylim(0, 1.4e7)\n",
"plt.grid(axis='y', linestyle=':', alpha=0.5)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "00ca4494-b03d-4f42-890f-9d8088bb61e7",
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.ticker as tkr\n",
"\n",
"ax = temp1.销售额.plot(kind='line', figsize=(9, 5), marker='o', color='navy', linestyle='--')\n",
"temp1.利润额.plot(ax=ax, kind='line', marker='*', color='darkgreen', linestyle='--', xlabel='')\n",
"plt.ylim(0, 14000000)\n",
"plt.legend(loc='lower right')\n",
"\n",
"# 基于ax构建双胞胎坐标系共享横轴自己定制纵轴\n",
"ax2 = ax.twinx()\n",
"ax2.yaxis.set_major_formatter(tkr.PercentFormatter(xmax=1, decimals=0))\n",
"profs_rates = temp1.利润额 / temp1.销售额\n",
"profs_rates.plot(ax=ax2, kind='line', marker='^', color='r', linestyle=':', label='毛利率')\n",
"plt.ylim(0.45, 0.65)\n",
"plt.legend()\n",
"plt.grid(axis='y', linestyle=':', alpha=0.5)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "637679a6-f7e1-4de8-a17e-4541790aa99c",
"metadata": {},
"outputs": [],
"source": [
"# 事实表连接维度表\n",
"merged_df = pd.merge(detail_df, outlet_df, how='left', on='门店编码')\n",
"merged_df = pd.merge(merged_df, commod_df, how='left', on='商品编码')\n",
"august_df = merged_df.query('月份 == 8')\n",
"august_df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e49793fb-6149-4366-94f6-2546fd14065e",
"metadata": {},
"outputs": [],
"source": [
"temp_df2 = august_df.groupby('省份')[['销售额', '成本额']].sum()\n",
"temp_df2.nlargest(10, '成本额')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6959bb8b-a2e5-4948-bfc1-c78be4a3d1d9",
"metadata": {},
"outputs": [],
"source": [
"temp_df2.nlargest(10, '成本额').plot(kind='bar', figsize=(9, 5), xlabel='')\n",
"plt.xticks(rotation=0)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e62162ff-af16-4a8f-af56-32b64712f0a5",
"metadata": {},
"outputs": [],
"source": [
"temp_df3 = august_df.query('省份 == \"湖南省\"').groupby('城市')[['销售额', '成本额']].sum()\n",
"temp_df3['利润率'] = (temp_df3.销售额 - temp_df3.成本额) / temp_df3.销售额\n",
"temp_df3.nsmallest(3, '利润率').style.format(formatter={'利润率': '{:.2%}'})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5626ff61-f8dd-4871-9ad3-28c25910d36b",
"metadata": {},
"outputs": [],
"source": [
"temp_df4 = august_df.query('省份 == \"湖南省\" and 城市 == \"长沙市\"').groupby('门店名称')[['销售额', '成本额']].sum()\n",
"temp_df4['利润率'] = (temp_df4.销售额 - temp_df4.成本额) / temp_df4.销售额\n",
"temp_df4.sort_values(by='利润率').style.format(formatter={'利润率': '{:.2%}'})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4993f987-37ad-450c-971a-d54ff4f6bc29",
"metadata": {},
"outputs": [],
"source": [
"august_df = august_df.query('省份 == \"湖南省\" and 城市 == \"长沙市\" and 门店名称 == \"长沙梅溪湖店\"')\n",
"august_df.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "04dfedf1-e7f1-4493-be54-a32b5e997e0d",
"metadata": {},
"outputs": [],
"source": [
"temp_df5 = august_df.groupby('商品类别')[['销售额', '成本额']].sum()\n",
"temp_df5['利润率'] = (temp_df5.销售额 - temp_df5.成本额) / temp_df5.销售额\n",
"temp_df5.sort_values(by='利润率').style.format(formatter={'利润率': '{:.2%}'})"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3fd3e504-3292-4bab-b3dd-f2f3629f6285",
"metadata": {},
"outputs": [],
"source": [
"temp_df6 = august_df.query('商品类别 == \"零食\"').groupby('商品名称')[['销售额', '成本额']].sum()\n",
"temp_df6['利润率'] = (temp_df6.销售额 - temp_df6.成本额) / temp_df6.销售额\n",
"temp_df6.sort_values(by='利润率').style.format(formatter={'利润率': '{:.2%}'})"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}