{
"cells": [
{
"cell_type": "markdown",
"id": "6fc07f67-318b-4d79-8d4e-4eb8a2c61be2",
"metadata": {},
"source": [
"## NumPy进阶"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "a9d74703-47d5-44f4-8566-eb7d5476c792",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"\n",
"plt.rcParams['font.sans-serif'].insert(0, 'SimHei')\n",
"plt.rcParams['axes.unicode_minus'] = False"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "d139c565-6bf2-4bf6-9d66-d2755b29d1db",
"metadata": {},
"outputs": [],
"source": [
"%config InlineBackend.figure_format = 'svg'\n",
"%matplotlib inline"
]
},
{
"cell_type": "markdown",
"id": "d41a57e5-6009-455b-aff9-4f96682423fc",
"metadata": {},
"source": [
"### NumPy中的函数\n",
"\n",
"#### 通用一元函数"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "5f881886-8aca-40cb-a9f3-4514e28b8fe3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 1., 2., 3., inf, nan, -inf, nan, 5.])"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# inf - infinity\n",
"# nan - not a number\n",
"array1 = np.array([1, 2, 3, np.inf, np.nan, -np.inf, np.nan, 5])\n",
"array1"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "b6e891cc-035c-4e98-9406-1e78e3623e76",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dtype('float64')"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"array1.dtype"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "674995a2-e50a-45a6-b1ad-7a9f88331dd0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([False, False, False, False, True, False, True, False])"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.isnan(array1)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "358641bc-510c-4f1b-9df7-5dd54de47978",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 1., 2., 3., inf, -inf, 5.])"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"array1[~np.isnan(array1)]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "20030d7d-822e-45c7-b962-3aaf706e133c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ True, True, True, False, False, False, False, True])"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.isfinite(array1)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "357cc22a-7acf-46ee-9523-631134dc8eae",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1., 2., 3., 5.])"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"array1[np.isfinite(array1)]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "c38f23a4-7d72-4ce4-9bc4-9f8cd9433fa8",
"metadata": {},
"outputs": [],
"source": [
"x = np.linspace(0.5, 10, 72)\n",
"y1 = np.sin(x)\n",
"y2 = np.log2(x)\n",
"y3 = np.sqrt(x)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "08a272bc-8765-455b-b0d0-700872071cf4",
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# 定制画布\n",
"plt.figure(figsize=(8, 4))\n",
"# 绘制折线图\n",
"plt.plot(x, y1, marker='.', label='$y=sin(x)$')\n",
"plt.plot(x, y2, label='$y=log_{2}x$', linewidth=3, color='#9c9c9c')\n",
"plt.plot(x, y3, label='$y=\\sqrt{x}$', linestyle='-.', linewidth=0.5)\n",
"# 显示图例\n",
"plt.legend(loc='center right')\n",
"# 显示图表\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"id": "0b292f56-dab7-469e-89ed-0fb2114902aa",
"metadata": {},
"source": [
"#### 通用二元函数"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "8b67932a-481e-4e2d-9d83-00994a01d959",
"metadata": {},
"outputs": [],
"source": [
"array2 = np.array([0.1 + 0.2, 0.1 + 0.2 + 0.3])\n",
"array3 = np.array([0.3, 0.6])"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "23581a64-7b02-4f3f-8a5f-ea49ec20e48a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([False, False])"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"array2 == array3"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "ddcb612c-c7aa-44c6-b8d3-1ee123fba534",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"np.False_"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.all(array2 == array3)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "40d454ad-8c60-4132-8dde-10726180e552",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 比较两个数组元素是否(几乎)完全相等 - 有误差容忍度\n",
"np.allclose(array2, array3)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "f1aab287-9d50-4b32-94fa-26d7f183fde3",
"metadata": {},
"outputs": [],
"source": [
"array4 = np.array([1, 2, 3, 4, 5, 6])\n",
"array5 = np.array([2, 4, 6, 8, 10])"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "ea25f2ad-e007-486b-a402-eb3436c9346c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([2, 4, 6])"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 交集\n",
"np.intersect1d(array4, array5)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "e4d2116c-c895-4597-95dc-fda67a1c99a8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 1, 2, 3, 4, 5, 6, 8, 10])"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 并集\n",
"np.union1d(array4, array5)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "5348c4f2-4222-4904-bd07-a3c85e62e4c7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1, 3, 5])"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 差集\n",
"np.setdiff1d(array4, array5)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "9cd3f3e5-a986-469f-97ba-c739aa4b8577",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 1, 3, 5, 8, 10])"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 对称差\n",
"np.setxor1d(array4, array5)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "78eb3a98-5992-452e-ab13-eaf578cab7a0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([False, True, False, True, False, True])"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 成员运算\n",
"# np.in1d(array4, array5)\n",
"np.isin(array4, array5)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "c7bbc624-dfdc-41d9-bfa2-7a4f3a387bce",
"metadata": {},
"outputs": [],
"source": [
"# 杰卡德相似度\n",
"user_a = np.array(['平板电脑', '尿不湿', '手机', '键盘', '手机支架', '奶瓶', '婴儿辅食', '基围虾', '巴沙鱼', '生抽', '沙拉酱'])\n",
"user_b = np.array(['平板电脑', '键盘', '充电宝', '补光灯', '生抽', '散热器', '笔记本电脑', '双肩包', '登山杖', '露营帐篷', '睡袋'])\n",
"user_c = np.array(['沐浴露', '维C泡腾片', '牛奶', '尿不湿', '平板电脑', '奶瓶', '婴儿辅食', '手机', '磨牙棒', '生抽', '基围虾'])"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "c4132979-1ef5-4e2b-93a4-2d6bfc66f38b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.15789473684210525"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.intersect1d(user_a, user_b).size / np.union1d(user_a, user_b).size"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "46dda506-908b-405a-8e9b-c14090da05b7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.4666666666666667"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.intersect1d(user_a, user_c).size / np.union1d(user_a, user_c).size"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fb5435f8-b8d4-4b37-88fb-13149a62660e",
"metadata": {},
"outputs": [],
"source": [
"np.setdiff1d(user_a, user_c)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f8366b08-779d-4369-9f27-a9b4b9125782",
"metadata": {},
"outputs": [],
"source": [
"np.setdiff1d(user_c, user_a)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b1ce07b5-ec70-4512-814a-e210148ed205",
"metadata": {},
"outputs": [],
"source": [
"# 余弦相似度\n",
"user = np.array([5, 1, 3])\n",
"mov1 = np.array([4, 5, 1])\n",
"mov2 = np.array([5, 1, 5])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5ab70c0f-cfe7-4e10-b162-0feefb36f884",
"metadata": {},
"outputs": [],
"source": [
"# linear algebra\n",
"# np.dot - 点积\n",
"# np.linalg.norm - 模长\n",
"np.dot(user, mov1) / (np.linalg.norm(user) * np.linalg.norm(mov1))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "21a1caaf-2cf4-4cf8-836c-d244f4133098",
"metadata": {},
"outputs": [],
"source": [
"# np.arcos - 反余弦函数 - 弧度\n",
"# np.degrees - 弧度换算角度\n",
"np.degrees(np.arccos(np.dot(user, mov1) / (np.linalg.norm(user) * np.linalg.norm(mov1))))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9b90ecb1-67d0-48be-84c5-54bf212f1292",
"metadata": {},
"outputs": [],
"source": [
"np.degrees(np.arccos(np.dot(user, mov2) / (np.linalg.norm(user) * np.linalg.norm(mov2))))"
]
},
{
"cell_type": "markdown",
"id": "c81c6238-f28c-44e8-ac54-94a69f5a6c4a",
"metadata": {},
"source": [
"#### 其他常用函数"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1df2d086-58cd-4324-b2cf-98d8d971a4d7",
"metadata": {},
"outputs": [],
"source": [
"array6 = np.array([1, 2, 3, 1, 1, 2, 2, 4, 5, 7, 3, 6, 6])\n",
"array6"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "70df0735-6680-4996-8cc3-10ac5d9102ab",
"metadata": {},
"outputs": [],
"source": [
"# 去重\n",
"array7 = np.unique(array6)\n",
"array7"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "79ad92e0-da87-401d-aa70-0514a5b61d0e",
"metadata": {},
"outputs": [],
"source": [
"array8 = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]])\n",
"array9 = np.array([[4, 4, 4], [5, 5, 5], [6, 6, 6]])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b099b5ee-d7df-4864-89f3-bb1c7a748a59",
"metadata": {},
"outputs": [],
"source": [
"# 在0轴方向(垂直)堆叠 - vertical\n",
"array10 = np.vstack((array8, array9))\n",
"array10"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "204b8e34-43e3-4a4f-9e8c-8734e72a041f",
"metadata": {},
"outputs": [],
"source": [
"# 在1轴的方向堆叠 - horizontal\n",
"np.hstack((array8, array9))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3301cc02-3ecf-4516-b2ff-e0fa6a9041c7",
"metadata": {},
"outputs": [],
"source": [
"# 数组的拼接\n",
"np.concatenate((array8, array9), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c956190c-1382-4416-9ab2-09a19f5567f6",
"metadata": {},
"outputs": [],
"source": [
"# 堆叠出更高维的数组\n",
"np.stack((array8, array9), axis=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a956be87-65e9-4391-b2be-bfbaaab7e7dc",
"metadata": {},
"outputs": [],
"source": [
"np.stack((array8, array9), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "df88bd16-64c3-40af-adba-f7faed516159",
"metadata": {},
"outputs": [],
"source": [
"# 将一个数组拆分成多个数组\n",
"np.vsplit(array10, 3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "592ffc62-2a47-48bc-8789-3cad653d2893",
"metadata": {},
"outputs": [],
"source": [
"# 追加元素\n",
"np.append(array6, [10, 11, 12])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7a3355ca-1ee7-4d8f-8567-c10fed5055f6",
"metadata": {},
"outputs": [],
"source": [
"# 插入元素\n",
"np.insert(array6, 1, [10, 20])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4b824b66-7471-4669-aa99-8f62a6b8cb2b",
"metadata": {},
"outputs": [],
"source": [
"array11 = np.random.randint(1, 100, 10)\n",
"array11"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2ff82b6f-9a96-40c2-a4d9-4aaec7d42e9a",
"metadata": {},
"outputs": [],
"source": [
"# 抽取元素 - 相当于布尔索引的作用\n",
"np.extract(array11 < 50, array11)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cc00dac0-0b97-435c-960d-06c141de0b78",
"metadata": {},
"outputs": [],
"source": [
"# 给出一组条件和对应的处理数据的表达式,满足条件就执行对应的表达式,不满足条件取默认值\n",
"np.select([array11 < 30, array11 > 50], [array11 * 10, array11 // 10], default=100)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4d7b8700-a431-4da4-99c6-54eda9f065dd",
"metadata": {},
"outputs": [],
"source": [
"# 给出一个条件和两个表达式,满足条件执行表达式1,不满足条件执行表达式2\n",
"np.where(array11 < 50, array11 * 10, array11 // 10)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bc672588-f36b-4951-902c-6b70ef83d1af",
"metadata": {},
"outputs": [],
"source": [
"array11"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aa96b613-cef4-475e-b88c-f6ee8194ef4c",
"metadata": {},
"outputs": [],
"source": [
"# 滚动数组元素\n",
"np.roll(array11, 2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bfeed7b4-d835-4a1a-9858-d27d4bc363c3",
"metadata": {},
"outputs": [],
"source": [
"np.roll(array11, -2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "52b9da9d-dd2b-4fea-984c-6d4b94efedab",
"metadata": {},
"outputs": [],
"source": [
"np.roll(array10, 2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "53aa8382-51e2-4634-bb9f-4ff84ddaef60",
"metadata": {},
"outputs": [],
"source": [
"np.roll(array10, 2, axis=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5f2de13b-2773-4a87-a854-ee9486dfcc0d",
"metadata": {},
"outputs": [],
"source": [
"array12 = np.arange(1, 10).reshape((3, 3))\n",
"array12"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3808483f-2811-45fd-adbb-a10bcc9d7dc6",
"metadata": {},
"outputs": [],
"source": [
"np.roll(array12, 2, axis=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7f6d8d16-2e35-4926-ba13-d5e9cf77660d",
"metadata": {},
"outputs": [],
"source": [
"np.roll(array12, 1, axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "29b76e3a-8414-4658-b0c6-7795f2186fbe",
"metadata": {},
"outputs": [],
"source": [
"# 替换数组元素\n",
"np.put(array11, [1, 3, 5, 7], [33, 88])\n",
"array11"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eb4653ef-5c5a-4d3c-adb2-0f10d79ca6c6",
"metadata": {},
"outputs": [],
"source": [
"np.place(array11, array11 > 50, [44, 99])\n",
"array11"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "03bcc628-1471-44ef-ad56-88468c08548d",
"metadata": {},
"outputs": [],
"source": [
"guido_image = plt.imread('res/guido.jpg')\n",
"guido_image.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "16fa2543-195b-47d6-8e29-2682800f91aa",
"metadata": {},
"outputs": [],
"source": [
"plt.imshow(np.flip(guido_image, axis=0))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "259830fa-6cd3-43be-bffe-560050f795b9",
"metadata": {},
"outputs": [],
"source": [
"plt.imshow(np.flip(guido_image, axis=1))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6059087d-85ba-4151-9af0-8870876a6b1a",
"metadata": {},
"outputs": [],
"source": [
"plt.imshow(np.flip(guido_image, axis=2))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "70ebbf72-87c9-41d8-8c46-f1eccb531206",
"metadata": {},
"outputs": [],
"source": [
"plt.imshow(guido_image)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "015548b9-819c-49ba-a13d-7777662a7414",
"metadata": {},
"outputs": [],
"source": [
"plt.imshow(guido_image.swapaxes(0, 1))"
]
},
{
"cell_type": "markdown",
"id": "7c4c00de-f16f-4aac-ae37-e0b9df0eb6c2",
"metadata": {},
"source": [
"#### 普通函数矢量化"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fa07c8bc-9558-4d2c-b12c-b0e3814cbb48",
"metadata": {},
"outputs": [],
"source": [
"# 面试官:讲一讲Python语言中的装饰器\n",
"# 用一个函数去装饰另一个函数或者一个类并为其提供额外的能力(横切关注功能)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "02e4d3e6-9bc7-462e-8be4-800a1dcdc632",
"metadata": {},
"outputs": [],
"source": [
"# 面试题:写一个装饰器,如果原函数返回字符串,那么将字符串每个单词首字母大写\n",
"from functools import wraps\n",
"\n",
"\n",
"def titlize_str(func):\n",
"\n",
" @wraps(func)\n",
" def wrapper(*args, **kwargs):\n",
" result = func(*args, **kwargs)\n",
" if isinstance(result, str):\n",
" result = result.title()\n",
" return result\n",
"\n",
" return wrapper"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e798956c-d543-4a50-a12b-f7314b98bf40",
"metadata": {},
"outputs": [],
"source": [
"@titlize_str\n",
"def say_hello(name):\n",
" return 'hello, ' + name"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "82145cd0-bcb2-44b8-90ef-ac461e2120bd",
"metadata": {},
"outputs": [],
"source": [
"# 如果不使用@语法糖(便捷语法),也可以通过下面的方式应用装饰器\n",
"# say_hello = titlize_str(say_hello)\n",
"# say_hello('tom')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "108ca8a5-ec63-458b-b95b-df759b68cb51",
"metadata": {},
"outputs": [],
"source": [
"say_hello('tom')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3d846bc7-718f-4344-b90e-c7803a104887",
"metadata": {},
"outputs": [],
"source": [
"# 获取原函数\n",
"say_hello = say_hello.__wrapped__\n",
"say_hello('tom')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ea620684-3375-4876-b097-49e81bf225c9",
"metadata": {},
"outputs": [],
"source": [
"# 优化代码的执行性能:空间换时间\n",
"from functools import lru_cache\n",
"\n",
"\n",
"@lru_cache(maxsize=128)\n",
"def fib(n):\n",
" \"\"\"获取第n个斐波那契数\"\"\"\n",
" if n in (1, 2):\n",
" return 1\n",
" return fib(n - 1) + fib(n - 2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9f72372e-9cc9-4751-8ee7-e8d9f40727bd",
"metadata": {},
"outputs": [],
"source": [
"for i in range(1, 121):\n",
" print(i, fib(i))"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "29d231a2-57cd-4786-85cd-1366f5378185",
"metadata": {},
"outputs": [],
"source": [
"# 通过vectorize装饰器将普通函数做矢量化处理\n",
"@np.vectorize\n",
"def fac(n):\n",
" if n == 0:\n",
" return 1\n",
" return n * fac(n - 1)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "7c04cc06-ba86-4b1b-a4e1-75e4527f6dde",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1, 2, 3, 4, 5, 6, 7, 8])"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"temp = np.arange(1, 9)\n",
"temp"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "38e8026a-6e75-4e1b-a646-98c86736797f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 1, 2, 6, 24, 120, 720, 5040, 40320])"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fac(temp)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "4ac8bb35-87f0-44d0-930e-3fc0c5fb63c3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([26, 68, 73, 33, 64, 54, 26, 40, 60, 36]),\n",
" array([37, 56, 65, 30, 57, 36, 61, 54, 34, 52]))"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x1 = np.random.randint(20, 80, 10)\n",
"x2 = np.random.randint(30, 70, 10)\n",
"x1, x2"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "4baced9b-fee2-4c2b-b7ca-dc5914b120b9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 1, 4, 1, 3, 1, 18, 1, 2, 2, 4])"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from math import gcd, lcm\n",
"\n",
"gcd = np.vectorize(gcd)\n",
"gcd(x1, x2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "08c6a075-8b61-4716-92c5-126cd78108c1",
"metadata": {},
"outputs": [],
"source": [
"lcm = np.vectorize(lcm)\n",
"lcm(x1, x2)"
]
},
{
"cell_type": "markdown",
"id": "859bba4b-a0cf-4140-a8de-b2f3fffcd355",
"metadata": {},
"source": [
"### 广播机制\n",
"\n",
"两个形状(shape属性)不一样的数组如果要做运算,要先通过广播机制使其形状一样才能运算。
\n",
"如果要执行广播机制使得两个数组形状一样,需要满足以下两个条件其中一个:\n",
"\n",
"1. 两个数组后缘维度(shape属性从后往前看对应的部分)相同。\n",
"2. 两个数组后缘维度不同,但是其中一方为1。"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3339c56a-b68c-401e-a27c-134be60ccf14",
"metadata": {},
"outputs": [],
"source": [
"temp1 = np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2], [3, 3, 3]])\n",
"temp2 = np.array([1, 2, 3])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4ecc9498-792c-4de1-a120-585003b19087",
"metadata": {},
"outputs": [],
"source": [
"temp1 + temp2"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2cb20c46-3f87-4346-80ab-7e2786ca1475",
"metadata": {},
"outputs": [],
"source": [
"temp3 = np.array([[1], [2], [3], [4]])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "58932222-bb83-43b7-8cef-b9574898dbb5",
"metadata": {},
"outputs": [],
"source": [
"temp1 + temp3"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "74b6c376-05ac-4049-a4b9-5f0614de780e",
"metadata": {},
"outputs": [],
"source": [
"temp4 = np.array([1 ,2, 3])\n",
"temp5 = np.array([[3], [2], [1]])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eefe6354-4614-4dfb-aa94-3d146b770b3b",
"metadata": {},
"outputs": [],
"source": [
"temp4.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "04dd44a5-6bea-41eb-aad0-b0a18d64a512",
"metadata": {},
"outputs": [],
"source": [
"temp5.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "764eb0c3-991d-4a5f-a4eb-7da2c0b445bd",
"metadata": {},
"outputs": [],
"source": [
"temp4 + temp5"
]
},
{
"cell_type": "markdown",
"id": "8f1022cb-c07a-4149-aafd-9f53d235da4f",
"metadata": {},
"source": [
"### 矩阵"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8a16c29a-088a-473b-b856-3dbd6660f9cd",
"metadata": {},
"outputs": [],
"source": [
"m1 = np.array([[1, 0, 2], [-1, 3, 1]])\n",
"m2 = np.array([[3, 1], [2, 1], [1, 0]])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "74a54196-ca95-4425-9212-62869795aed7",
"metadata": {},
"outputs": [],
"source": [
"m1.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1afbceea-782d-49c9-b6d4-d0848f3fdd99",
"metadata": {},
"outputs": [],
"source": [
"m2.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bc65a37a-84eb-4aac-b63f-5625a0c15a3a",
"metadata": {},
"outputs": [],
"source": [
"m1 @ m2"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "67be1c96-a20c-4862-87cb-8579d3a303f5",
"metadata": {},
"outputs": [],
"source": [
"np.matmul(m1, m2)"
]
},
{
"cell_type": "markdown",
"id": "5fd602a8-61fc-4c5c-94a6-a930bcf6fb2f",
"metadata": {},
"source": [
"$$\n",
"\\begin{cases}\n",
"x_1 + 2x_2 + x_3 = 8 \\\\\n",
"3x_1 + 7x_2 + 2x_3 = 23 \\\\\n",
"2x_1 + 2x_2 + x_3 = 9\n",
"\\end{cases}\n",
"$$"
]
},
{
"cell_type": "markdown",
"id": "d41b4856-79c3-4f48-9d8e-58c8d6045884",
"metadata": {},
"source": [
"$$\n",
"\\boldsymbol{A} = \\begin{bmatrix}\n",
"1 & 2 & 1\\\\\n",
"3 & 7 & 2\\\\\n",
"2 & 2 & 1\n",
"\\end{bmatrix}, \\quad\n",
"\\boldsymbol{x} = \\begin{bmatrix}\n",
"x_1 \\\\\n",
"x_2\\\\\n",
"x_3\n",
"\\end{bmatrix}, \\quad\n",
"\\boldsymbol{b} = \\begin{bmatrix}\n",
"8 \\\\\n",
"23\\\\\n",
"9\n",
"\\end{bmatrix}\n",
"$$"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "edb85b44-50b3-4115-8539-5023a19bb2a1",
"metadata": {},
"outputs": [],
"source": [
"m3 = np.arange(1, 10, dtype='f8').reshape(3, 3)\n",
"m3[-1, -1] = 8\n",
"m3"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0b90dbf4-05f7-43ad-a37d-20d48d03dd3a",
"metadata": {},
"outputs": [],
"source": [
"# 计算矩阵的秩\n",
"np.linalg.matrix_rank(m3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a442ab40-97a9-4cf5-8d61-bfcaa3561655",
"metadata": {},
"outputs": [],
"source": [
"# 逆矩阵 - 奇异矩阵不能求逆矩阵\n",
"# LinAlgError: Singular matrix\n",
"np.linalg.inv(m3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "19be7ff8-6a71-40ad-8e37-20008c56be7a",
"metadata": {},
"outputs": [],
"source": [
"# 有唯一解决的条件:系数矩阵的秩等于增广矩阵的秩,同时跟未知数的个数相同。\n",
"# 秩(rank):线性无关的行或者列的数量。\n",
"# 线性相关:一个向量可以通过其他向量做线性变换(数乘和加法)得到,那么它们就是线性相关的。"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "87043db3-e2bd-4a70-950a-d74163afc4d1",
"metadata": {},
"outputs": [],
"source": [
"A = np.array([[1, 2, 1], [3, 7, 2], [2, 2, 1]])\n",
"b = np.array([8, 23, 9]).reshape(-1, 1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "479d272b-2b46-4374-93f3-54e13af52d59",
"metadata": {},
"outputs": [],
"source": [
"# 系数矩阵的秩\n",
"np.linalg.matrix_rank(A)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "faa35591-09da-4232-9b38-72ee2fb824cc",
"metadata": {},
"outputs": [],
"source": [
"# 增广矩阵的秩\n",
"np.linalg.matrix_rank(np.hstack((A, b)))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e8d02dee-7d86-4f9d-8e33-77b5a76784a3",
"metadata": {},
"outputs": [],
"source": [
"# 解线性方程组\n",
"np.linalg.solve(A, b)"
]
},
{
"cell_type": "markdown",
"id": "336ee288-5be1-41e5-89cb-e22f465efdd2",
"metadata": {},
"source": [
"$$\n",
"A \\cdot x = b\n",
"$$\n",
"$$\n",
"A^{-1} \\cdot A \\cdot x = A^{-1} \\cdot b\n",
"$$\n",
"$$\n",
"I \\cdot x = A^{-1} \\cdot b\n",
"$$\n",
"$$\n",
"x = A^{-1} \\cdot b\n",
"$$"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7c2bd2fd-8867-4dad-8bbc-b5b175f690b7",
"metadata": {},
"outputs": [],
"source": [
"# 通过逆矩阵解线性方程组\n",
"np.linalg.inv(A) @ b"
]
},
{
"cell_type": "markdown",
"id": "b876a47b-a2ab-497a-b564-69750ddb8666",
"metadata": {},
"source": [
"#### 补充 - 用矩阵运算实现图像处理"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "95e138b5-ed1d-40c3-acbc-821b6ae8cf41",
"metadata": {},
"outputs": [],
"source": [
"# 安装opencv库\n",
"# %pip install opencv-python"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "669c59e5-a477-4bf3-b87b-7486e9d2e9ef",
"metadata": {},
"outputs": [],
"source": [
"def basic_matrix(translation):\n",
" \"\"\"基础变换矩阵\"\"\"\n",
" return np.array([[1, 0, translation[0]], [0, 1, translation[1]], [0, 0, 1]])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0338ad44-3142-428f-b1b6-45e691962900",
"metadata": {},
"outputs": [],
"source": [
"import copy\n",
"\n",
"def adjust_transform_for_image(img, trans_matrix):\n",
" \"\"\"根据图像调整变换矩阵\"\"\"\n",
" height, width, *_ = img.shape\n",
" center = np.array([0.5 * width, 0.5 * height])\n",
" return basic_matrix(center) @ trans_matrix @ basic_matrix(-center)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "368578be-3f94-4a37-be51-c6a5dbce1512",
"metadata": {},
"outputs": [],
"source": [
"import cv2\n",
"\n",
"def apply_transform(img, transform, border_value=(204, 204, 204)):\n",
" \"\"\"仿射变换\"\"\"\n",
" return cv2.warpAffine(\n",
" img,\n",
" transform[:2, :],\n",
" dsize=(img.shape[1], img.shape[0]),\n",
" flags=cv2.INTER_LINEAR,\n",
" borderMode=cv2.BORDER_CONSTANT,\n",
" borderValue=border_value\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "28b22755-4aed-4cd2-a3a2-57bd73f8eb00",
"metadata": {},
"outputs": [],
"source": [
"def apply(img, trans_matrix):\n",
" \"\"\"应用变换\"\"\"\n",
" temp_matrix = adjust_transform_for_image(img, trans_matrix)\n",
" out_img = apply_transform(img, temp_matrix)\n",
" return out_img"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1526fbcd-ad60-4d7c-892c-5c0ae2b7ef3f",
"metadata": {},
"outputs": [],
"source": [
"def scale(img, x_ratio, y_ratio):\n",
" \"\"\"缩放\"\"\"\n",
" scale_matrix = np.array([\n",
" [x_ratio, 0, 0], \n",
" [0, y_ratio, 0], \n",
" [0, 0, 1]\n",
" ])\n",
" return apply(img, scale_matrix)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5e443448-9b6c-488b-83a5-0de7ac4fca18",
"metadata": {},
"outputs": [],
"source": [
"def rotate(img, degree):\n",
" \"\"\"旋转\"\"\"\n",
" rad = np.deg2rad(degree)\n",
" rotate_matrix = np.array([\n",
" [np.cos(rad), -np.sin(rad), 0], \n",
" [np.sin(rad), np.cos(rad), 0], \n",
" [0, 0, 1]\n",
" ])\n",
" return apply(img, rotate_matrix)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2a09187e-2707-49c2-8b8d-84ffa28b6f7e",
"metadata": {},
"outputs": [],
"source": [
"def transvect(img, ratio):\n",
" \"\"\"剪切影射\"\"\"\n",
" transvect_matrix = np.array([\n",
" [1, ratio, 0],\n",
" [0, 1, 0],\n",
" [0, 0, 1]\n",
" ])\n",
" return apply(img, transvect_matrix)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9561e506-3b64-49e6-9e9d-8731a2951646",
"metadata": {},
"outputs": [],
"source": [
"scaled_img = scale(guido_image, 1.25, 0.75)\n",
"plt.imshow(scaled_img)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "56cb960b-b3f9-4109-95ba-4388a2b1762e",
"metadata": {},
"outputs": [],
"source": [
"rotated_img = rotate(guido_image, -45)\n",
"plt.imshow(rotated_img)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cc2ac604-0fd7-43d5-be18-407b78e46f58",
"metadata": {},
"outputs": [],
"source": [
"transvected_img = transvect(guido_image, -0.3)\n",
"plt.imshow(transvected_img)"
]
},
{
"cell_type": "markdown",
"id": "cd91252a-31d4-40d5-82ef-71f22f0bd39c",
"metadata": {},
"source": [
"#### 补充 - 用scipy处理图像"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9052864d-a32d-4cd3-9e2a-8df1654b8a67",
"metadata": {},
"outputs": [],
"source": [
"from scipy.ndimage import gaussian_filter, sobel\n",
"\n",
"# 获取灰度图\n",
"guido_image = plt.imread('res/guido.jpg')\n",
"gray_image = np.mean(guido_image, axis=2)\n",
"\n",
"plt.figure(figsize=(12, 4))\n",
"\n",
"# 灰度图\n",
"plt.subplot(1, 4, 1)\n",
"plt.imshow(gray_image, cmap=plt.cm.gray)\n",
"\n",
"# 模糊和锐化\n",
"plt.subplot(1, 4, 2)\n",
"blurred_image = gaussian_filter(gray_image, 3)\n",
"plt.imshow(blurred_image, cmap=plt.cm.gray)\n",
"\n",
"plt.subplot(1, 4, 3)\n",
"filtered_image = gaussian_filter(blurred_image, 1)\n",
"sharpen_image = blurred_image + 32 * (blurred_image - filtered_image)\n",
"plt.imshow(sharpen_image, cmap=plt.cm.gray)\n",
"\n",
"# 边缘图\n",
"plt.subplot(1, 4, 4)\n",
"# 使用索贝尔算子(邻点灰度加权差)进行边缘检测\n",
"edge_image = sobel(gray_image)\n",
"plt.imshow(edge_image, cmap=plt.cm.gray)\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dbbc6edf-c643-4cb4-a98b-05e806413478",
"metadata": {},
"outputs": [],
"source": [
"from scipy.ndimage import rotate, zoom\n",
"\n",
"plt.figure(figsize=(12, 4))\n",
"\n",
"# 旋转\n",
"plt.subplot(1, 3, 1)\n",
"rotated_image = rotate(guido_image, -16, reshape=True)\n",
"plt.imshow(rotated_image)\n",
"\n",
"# 旋转\n",
"plt.subplot(1, 3, 2)\n",
"rotated_image = rotate(guido_image, -16, reshape=False)\n",
"plt.imshow(rotated_image)\n",
"\n",
"# 缩放\n",
"plt.subplot(1, 3, 3)\n",
"scaled_image = zoom(guido_image, zoom=(0.8, 1.25, 1))\n",
"plt.imshow(scaled_image)\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"id": "331862d6-60cd-442f-97a4-bbbfbabfc3fa",
"metadata": {},
"source": [
"#### 补充 - 视频流人脸识别"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "383fa323-db82-4a8b-a13b-9cbe9b8c48d3",
"metadata": {},
"outputs": [],
"source": [
"# 安装face_recognition库\n",
"# %pip install face_recognition"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9c1cf7b8-9d89-43d4-8954-616bce780183",
"metadata": {},
"outputs": [],
"source": [
"import cv2\n",
"import face_recognition\n",
"# from PIL import Image\n",
"\n",
"plt.figure(figsize=(12, 8))\n",
"\n",
"image = face_recognition.load_image_file('res/Solvay.jpg')\n",
"locations = face_recognition.face_locations(image)\n",
"for location in locations:\n",
" top, right, bottom, left = location\n",
" # Image.fromarray(image[top:bottom, left:right]).show()\n",
" cv2.rectangle(image, (left, top), (right, bottom), (255, 0, 0), 2)\n",
"plt.imshow(image)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fa76d9c2-c8c3-4cec-8c91-b6b54fe6e32e",
"metadata": {},
"outputs": [],
"source": [
"# import cv2\n",
"# import face_recognition\n",
"# import numpy as np\n",
"\n",
"# # 获取摄像头\n",
"# video_capture = cv2.VideoCapture(0)\n",
"\n",
"# # 加载图片获取脸部特征\n",
"# obama_image = face_recognition.load_image_file(\"res/obama.jpg\")\n",
"# obama_face_encoding = face_recognition.face_encodings(obama_image)[0]\n",
"# luohao_image = face_recognition.load_image_file(\"res/luohao.png\")\n",
"# luohao_face_encoding = face_recognition.face_encodings(luohao_image)[0]\n",
"# guido_image = face_recognition.load_image_file(\"res/guido.jpg\")\n",
"# guido_face_encoding = face_recognition.face_encodings(guido_image)[0]\n",
"\n",
"# # 保存脸部特征和对应的名字\n",
"# known_face_encodings = [\n",
"# obama_face_encoding,\n",
"# luohao_face_encoding,\n",
"# guido_face_encoding\n",
"# ]\n",
"# known_face_names = [\n",
"# \"Barack\",\n",
"# \"Hao\",\n",
"# \"Guido\"\n",
"# ]\n",
"\n",
"# face_locations = []\n",
"# face_encodings = []\n",
"# face_names = []\n",
"# process_this_frame = True\n",
"\n",
"# while True:\n",
"# # 从视频中读取一帧数据\n",
"# ret, frame = video_capture.read()\n",
"\n",
"# # 调整为原始尺寸的四分之一(加速处理)\n",
"# small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)\n",
"\n",
"# # BGR转成RGB\n",
"# rgb_small_frame = small_frame[:, :, ::-1]\n",
"\n",
"# if process_this_frame:\n",
"# # 找到所有的人脸位置和脸部特征保存在列表中\n",
"# face_locations = face_recognition.face_locations(rgb_small_frame)\n",
"# face_encodings = face_recognition.face_encodings(rgb_small_frame, face_locations)\n",
"\n",
"# face_names = []\n",
"# for face_encoding in face_encodings:\n",
"# # 比较脸部特征\n",
"# matches = face_recognition.compare_faces(known_face_encodings, face_encoding)\n",
"# name = \"Unknown\"\n",
"\n",
"# # 通过距离判定最佳匹配并获取对应的名字\n",
"# face_distances = face_recognition.face_distance(known_face_encodings, face_encoding)\n",
"# best_match_index = np.argmin(face_distances)\n",
"# if matches[best_match_index]:\n",
"# name = known_face_names[best_match_index]\n",
"\n",
"# face_names.append(name)\n",
"\n",
"# process_this_frame = not process_this_frame\n",
"\n",
"# # 显示结果\n",
"# for (top, right, bottom, left), name in zip(face_locations, face_names):\n",
"# # 恢复正常的尺寸\n",
"# top, right, bottom, left = top * 4, right * 4, bottom * 4, left * 4\n",
"# # 绘制一个标识人脸的矩形框\n",
"# cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)\n",
"# # 绘制一个填写名字的矩形框\n",
"# cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 255), cv2.FILLED)\n",
"# # 绘制识别出的人脸对应的名字\n",
"# cv2.putText(frame, name, (left + 6, bottom - 6), cv2.FONT_HERSHEY_DUPLEX, 1.0, (255, 255, 255), 1)\n",
" \n",
"# cv2.imshow('Video', frame)\n",
" \n",
"# # 按键盘上的q键退出窗口 \n",
"# if cv2.waitKey(1) & 0xFF == ord('q'):\n",
"# break\n",
"\n",
"# video_capture.release()\n",
"# cv2.destroyAllWindows()"
]
},
{
"cell_type": "markdown",
"id": "b6989fb7-a3ab-4889-80be-f4ecce033e5c",
"metadata": {},
"source": [
"### 多项式"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0c719ca9-de74-4c2e-aaf2-d0a9c833f512",
"metadata": {},
"outputs": [],
"source": [
"# NumPy老版本用poly1d表示多项式\n",
"p1 = np.poly1d([3, 0, 2, 1])\n",
"p2 = np.poly1d([1, 2, 3])\n",
"print(p1)\n",
"print(p2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e6aab279-fa9f-4d4e-b597-b3c51b478777",
"metadata": {},
"outputs": [],
"source": [
"# 多项式加法\n",
"print(p1 + p2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "375a1989-7231-432c-bf33-06fce490cacf",
"metadata": {},
"outputs": [],
"source": [
"# 多项式乘法\n",
"print(p1 * p2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4df5662a-c0bc-434d-b53f-623e09b51a76",
"metadata": {},
"outputs": [],
"source": [
"# 令x=2,计算多项式的值\n",
"p2(2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "df150df6-9904-45b7-8704-dbb033552dcb",
"metadata": {},
"outputs": [],
"source": [
"# 求导\n",
"print(p1.deriv())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d2d3002c-59d6-4e00-b00f-69f3f6ac6609",
"metadata": {},
"outputs": [],
"source": [
"# 求不定积分\n",
"print(p1.integ())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0965b9e9-42fc-425c-90cb-d265e3f6e42f",
"metadata": {},
"outputs": [],
"source": [
"p3 = np.poly1d([1, 3, 2])\n",
"print(p3)\n",
"# 令多项式等于0,求解x\n",
"print(p3.roots)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c5bda2d2-0d61-46db-ac48-0f7ec3438660",
"metadata": {},
"outputs": [],
"source": [
"type(p3)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a71c4799-183e-4096-9903-7edf9464feb7",
"metadata": {},
"outputs": [],
"source": [
"from numpy.polynomial import Polynomial\n",
"\n",
"# NumPy新版本用Polynomial表示多项式\n",
"p1 = Polynomial([1, 2, 0, 3])\n",
"print(p1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0002c6e3-697b-425c-a17c-802e10f07981",
"metadata": {},
"outputs": [],
"source": [
"print(p1.deriv())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1f9dde21-0854-4ae6-9703-9599a4204003",
"metadata": {},
"outputs": [],
"source": [
"print(p1.integ())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eb191b28-fb2c-460b-9c87-0041b128ba16",
"metadata": {},
"outputs": [],
"source": [
"# 最高次项\n",
"p1.degree()"
]
},
{
"cell_type": "markdown",
"id": "2d6c3103-c5b0-413f-b2de-324b0394e3ae",
"metadata": {},
"source": [
"### 最小二乘解"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7163cb46-44bb-487c-8dd2-7c530574ecc0",
"metadata": {},
"outputs": [],
"source": [
"# 每月收入\n",
"x = np.array([3200, 4811, 5386, 5564, 6120, 6691, 6906, 7483, 7587, 7890,\n",
" 8090, 8300, 8650, 8835, 8975, 9070, 9100, 9184, 9247, 9313, \n",
" 9465, 9558, 9853, 9938, 10020, 10242, 10343, 10731, 10885, 10990, \n",
" 11100, 11227, 11313, 11414, 11630, 11806, 11999, 12038, 12400, 12547, \n",
" 12890, 13050, 13360, 13850, 14890, 14990, 15500, 16899, 17010, 19880])\n",
"# 每月网购支出\n",
"y = np.array([1761, 882, 1106, 182, 1532, 1978, 2174, 2117, 2134, 1924, \n",
" 2207, 2876, 2617, 2683, 3054, 3277, 3345, 3462, 3401, 3591,\n",
" 3596, 3671, 3829, 3907, 3852, 4288, 4359, 4099, 4300, 4367,\n",
" 5019, 4873, 4674, 5174, 4666, 5797, 5782, 5451, 5487, 5448,\n",
" 6002, 6439, 6309, 6045, 5935, 6928, 7356, 6682, 6672, 6582])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a68a1d0d-0dc9-4437-bed9-6e9aff589df1",
"metadata": {},
"outputs": [],
"source": [
"# 定性分析 - 散点图\n",
"plt.scatter(x, y)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "89030441-eefd-4a02-ad41-ba154dbf87b2",
"metadata": {},
"outputs": [],
"source": [
"from scipy import stats\n",
"\n",
"# 夏皮洛检验(正态性判定)\n",
"stats.shapiro(x), stats.shapiro(y)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "93573426-e1c0-4025-8b8f-2d83d69e103c",
"metadata": {},
"outputs": [],
"source": [
"# 定量分析 - 相关系数 - correlation coefficient\n",
"# 皮尔逊相关系数(标准化的协方差 - [-1, 1])\n",
"# 1. 连续值且成对出现\n",
"# 2. 没有异常值\n",
"# 3. 来自于正态总体\n",
"np.corrcoef(x, y)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4b6e47de-8e3b-4be4-a5e4-f1d95649e6f9",
"metadata": {},
"outputs": [],
"source": [
"# 计算皮尔逊相关系数\n",
"stats.pearsonr(x, y)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f8e53b1e-b2b1-4222-a69a-dcbca0041d0e",
"metadata": {},
"outputs": [],
"source": [
"history_data = {key: value for key, value in zip(x, y)}\n",
"len(history_data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1530478e-ec70-4f87-828f-1d6798c53c5e",
"metadata": {},
"outputs": [],
"source": [
"data = np.random.randint(1, 100, 15).tolist()\n",
"data"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7d246563-86fd-418f-b058-4aae08d9b9c1",
"metadata": {},
"outputs": [],
"source": [
"import heapq\n",
"\n",
"# 通过堆(heap)结构快速的找到TopN元素\n",
"print(heapq.nsmallest(3, data))\n",
"print(heapq.nlargest(5, data))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "866f9fd9-58aa-4c03-9415-99aa236958d4",
"metadata": {},
"outputs": [],
"source": [
"# 目标:因为月收入和网购支出之间有强相关关系,所以我们可以通过月收入预测网购支出\n",
"# 方法1:输入一个月收入,找到跟这个收入最接近的N条数据,用它们的平均值预测对应的网购支出\n",
"# KNN - k最近邻算法(找到k个最近的邻居,用这k个邻居的数据来做出预测)\n",
"import heapq\n",
"\n",
"\n",
"def predicate_by_knn(income, k=5):\n",
" \"\"\"KNN算法\"\"\"\n",
" keys = heapq.nsmallest(k, history_data, key=lambda x: (x - income) ** 2)\n",
" return np.mean([history_data[key] for key in keys]).round(2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "399b9b6a-6625-4dc2-a09c-c0699c538a46",
"metadata": {},
"outputs": [],
"source": [
"predicate_by_knn(12800)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "db0ee4bf-83f8-489f-a543-80854571da60",
"metadata": {},
"outputs": [],
"source": [
"predicate_by_knn(6800)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6e514003-c143-4823-91eb-57f7608f6137",
"metadata": {},
"outputs": [],
"source": [
"predicate_by_knn(20000, k=3)"
]
},
{
"cell_type": "markdown",
"id": "aa44d3ad-aa3f-44db-889a-8a69b9a9be65",
"metadata": {},
"source": [
"回归模型:\n",
"$$ Y = aX + b $$\n",
"\n",
"损失函数:\n",
"$$ MSE = \\frac{1} {N} \\sum (\\hat{y_i} - y_i)^2 $$"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "032eb740-ac9a-4526-9eb4-b6770907b07a",
"metadata": {},
"outputs": [],
"source": [
"# MSE - Mean Squared Error\n",
"def get_loss(a, b):\n",
" \"\"\"损失函数\"\"\"\n",
" y_hat = a * x + b\n",
" return np.mean((y_hat - y) ** 2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0d56ccc4-5a0d-4b3a-89ab-01cf690c4fba",
"metadata": {},
"outputs": [],
"source": [
"# 蒙特卡洛模拟(随机瞎蒙法)\n",
"import random\n",
"\n",
"min_loss = np.inf\n",
"ba, bb = None, None\n",
"\n",
"for _ in range(10000):\n",
" a = random.random() * 0.5 + 0.5\n",
" b = random.random() * 1000 - 2000\n",
" curr_loss = get_loss(a, b)\n",
" if curr_loss < min_loss:\n",
" min_loss = curr_loss\n",
" ba, bb = a, b\n",
" print(min_loss)\n",
"print(ba, bb)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "af5f7d02-f921-4712-82e3-1ce43c86c708",
"metadata": {},
"outputs": [],
"source": [
"plt.scatter(x, y)\n",
"plt.plot(x, ba * x + bb, color='r', linewidth=4)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "207c2da0-371f-40ea-b19f-f56cca8a7d30",
"metadata": {},
"outputs": [],
"source": [
"def predicate_by_regression(income):\n",
" return round(ba * income + bb, 2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a9e3af32-654c-4704-9f32-cfe41f067d06",
"metadata": {},
"outputs": [],
"source": [
"predicate_by_regression(6800)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5133079a-59f8-4435-af1e-fb5bc13c457d",
"metadata": {},
"outputs": [],
"source": [
"predicate_by_regression(12800)"
]
},
{
"cell_type": "markdown",
"id": "200b9821-b8c8-41df-ae8e-d96bf5049415",
"metadata": {},
"source": [
"将回归模型带入损失函数:\n",
"$$ f(a, b) = \\frac {1} {N} \\sum_{i=1}^{N}(y_i - (ax_i + b))^2 $$\n",
"\n",
"如何让$f(a, b)$取到最小值???\n",
"\n",
"求偏导数,并令其等于0。\n",
"$$ \\frac {\\partial {f(a, b)}} {\\partial {a}} = \\frac {2} {N} \\sum_{i=1}^{N}(-x_iy_i + x_i^2a + x_ib) = 0 $$ \n",
"$$ \\frac {\\partial {f(a, b)}} {\\partial {b}} = \\frac {2} {N} \\sum_{i=1}^{N}(-y_i + x_ia + b) = 0 $$\n",
"\n",
"求解得到:\n",
"$$a = \\frac{\\sum(x_{i} - \\bar{x})(y_{i} - \\bar{y})}{\\sum(x_{i} - \\bar{x})^{2}}$$\n",
"$$b = \\bar{y} - a\\bar{x}$$"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9ab79db5-85c7-44ad-8ffa-2684fb499f7a",
"metadata": {},
"outputs": [],
"source": [
"x_bar, y_bar = np.mean(x), np.mean(y)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "332777b5-bc3f-44d2-bd54-f8e9a19497aa",
"metadata": {},
"outputs": [],
"source": [
"ba = np.dot((x - x_bar), (y - y_bar)) / np.sum((x - x_bar) ** 2)\n",
"bb = y_bar - ba * x_bar\n",
"ba, bb"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "795f6a40-136a-4772-9e2b-5798022e408d",
"metadata": {},
"outputs": [],
"source": [
"plt.scatter(x, y)\n",
"plt.plot(x, ba * x + bb, color='r', linewidth=4)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2bfac239-7bf0-4784-a124-bbfdb4cdafe8",
"metadata": {},
"outputs": [],
"source": [
"# 拟合出一个线性回归模型\n",
"np.polyfit(x, y, deg=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9d5adc92-73e2-40df-843b-02efb28c6ae3",
"metadata": {},
"outputs": [],
"source": [
"# 拟合出一个多项式回归模型\n",
"a, b, c = np.polyfit(x, y, deg=2)\n",
"a, b, c"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c42ee939-e4b9-479e-ad73-66e806e29c6c",
"metadata": {},
"outputs": [],
"source": [
"plt.scatter(x, y)\n",
"plt.plot(x, a * x ** 2 + b * x + c, color='r', linewidth=4)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3363f359-3615-4ffa-a040-41e39c83b38f",
"metadata": {},
"outputs": [],
"source": [
"Polynomial.fit(x, y, deg=1).convert().coef"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}