{ "cells": [ { "cell_type": "markdown", "id": "6fc07f67-318b-4d79-8d4e-4eb8a2c61be2", "metadata": {}, "source": [ "## NumPy进阶" ] }, { "cell_type": "code", "execution_count": 1, "id": "a9d74703-47d5-44f4-8566-eb7d5476c792", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "\n", "plt.rcParams['font.sans-serif'].insert(0, 'SimHei')\n", "plt.rcParams['axes.unicode_minus'] = False" ] }, { "cell_type": "code", "execution_count": 2, "id": "d139c565-6bf2-4bf6-9d66-d2755b29d1db", "metadata": {}, "outputs": [], "source": [ "%config InlineBackend.figure_format = 'svg'\n", "%matplotlib inline" ] }, { "cell_type": "markdown", "id": "d41a57e5-6009-455b-aff9-4f96682423fc", "metadata": {}, "source": [ "### NumPy中的函数\n", "\n", "#### 通用一元函数" ] }, { "cell_type": "code", "execution_count": 3, "id": "5f881886-8aca-40cb-a9f3-4514e28b8fe3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 1., 2., 3., inf, nan, -inf, nan, 5.])" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# inf - infinity\n", "# nan - not a number\n", "array1 = np.array([1, 2, 3, np.inf, np.nan, -np.inf, np.nan, 5])\n", "array1" ] }, { "cell_type": "code", "execution_count": 4, "id": "b6e891cc-035c-4e98-9406-1e78e3623e76", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "dtype('float64')" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "array1.dtype" ] }, { "cell_type": "code", "execution_count": 5, "id": "674995a2-e50a-45a6-b1ad-7a9f88331dd0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([False, False, False, False, True, False, True, False])" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.isnan(array1)" ] }, { "cell_type": "code", "execution_count": 6, "id": "358641bc-510c-4f1b-9df7-5dd54de47978", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 1., 2., 3., inf, -inf, 5.])" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "array1[~np.isnan(array1)]" ] }, { "cell_type": "code", "execution_count": 7, "id": "20030d7d-822e-45c7-b962-3aaf706e133c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ True, True, True, False, False, False, False, True])" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.isfinite(array1)" ] }, { "cell_type": "code", "execution_count": 8, "id": "357cc22a-7acf-46ee-9523-631134dc8eae", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1., 2., 3., 5.])" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "array1[np.isfinite(array1)]" ] }, { "cell_type": "code", "execution_count": 9, "id": "c38f23a4-7d72-4ce4-9bc4-9f8cd9433fa8", "metadata": {}, "outputs": [], "source": [ "x = np.linspace(0.5, 10, 72)\n", "y1 = np.sin(x)\n", "y2 = np.log2(x)\n", "y3 = np.sqrt(x)" ] }, { "cell_type": "code", "execution_count": 10, "id": "08a272bc-8765-455b-b0d0-700872071cf4", "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n" ], "text/plain": [ "

" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# 定制画布\n", "plt.figure(figsize=(8, 4))\n", "# 绘制折线图\n", "plt.plot(x, y1, marker='.', label='$y=sin(x)$')\n", "plt.plot(x, y2, label='$y=log_{2}x$', linewidth=3, color='#9c9c9c')\n", "plt.plot(x, y3, label='$y=\\sqrt{x}$', linestyle='-.', linewidth=0.5)\n", "# 显示图例\n", "plt.legend(loc='center right')\n", "# 显示图表\n", "plt.show()" ] }, { "cell_type": "markdown", "id": "0b292f56-dab7-469e-89ed-0fb2114902aa", "metadata": {}, "source": [ "#### 通用二元函数" ] }, { "cell_type": "code", "execution_count": 11, "id": "8b67932a-481e-4e2d-9d83-00994a01d959", "metadata": {}, "outputs": [], "source": [ "array2 = np.array([0.1 + 0.2, 0.1 + 0.2 + 0.3])\n", "array3 = np.array([0.3, 0.6])" ] }, { "cell_type": "code", "execution_count": 12, "id": "23581a64-7b02-4f3f-8a5f-ea49ec20e48a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([False, False])" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "array2 == array3" ] }, { "cell_type": "code", "execution_count": 13, "id": "ddcb612c-c7aa-44c6-b8d3-1ee123fba534", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.all(array2 == array3)" ] }, { "cell_type": "code", "execution_count": 14, "id": "40d454ad-8c60-4132-8dde-10726180e552", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 比较两个数组元素是否（几乎）完全相等 - 有误差容忍度\n", "np.allclose(array2, array3)" ] }, { "cell_type": "code", "execution_count": 15, "id": "f1aab287-9d50-4b32-94fa-26d7f183fde3", "metadata": {}, "outputs": [], "source": [ "array4 = np.array([1, 2, 3, 4, 5, 6])\n", "array5 = np.array([2, 4, 6, 8, 10])" ] }, { "cell_type": "code", "execution_count": 16, "id": "ea25f2ad-e007-486b-a402-eb3436c9346c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([2, 4, 6])" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 交集\n", "np.intersect1d(array4, array5)" ] }, { "cell_type": "code", "execution_count": 17, "id": "e4d2116c-c895-4597-95dc-fda67a1c99a8", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 1, 2, 3, 4, 5, 6, 8, 10])" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 并集\n", "np.union1d(array4, array5)" ] }, { "cell_type": "code", "execution_count": 18, "id": "5348c4f2-4222-4904-bd07-a3c85e62e4c7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1, 3, 5])" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 差集\n", "np.setdiff1d(array4, array5)" ] }, { "cell_type": "code", "execution_count": 19, "id": "9cd3f3e5-a986-469f-97ba-c739aa4b8577", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 1, 3, 5, 8, 10])" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 对称差\n", "np.setxor1d(array4, array5)" ] }, { "cell_type": "code", "execution_count": 20, "id": "78eb3a98-5992-452e-ab13-eaf578cab7a0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([False, True, False, True, False, True])" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 成员运算\n", "# np.in1d(array4, array5)\n", "np.isin(array4, array5)" ] }, { "cell_type": "code", "execution_count": 21, "id": "c7bbc624-dfdc-41d9-bfa2-7a4f3a387bce", "metadata": {}, "outputs": [], "source": [ "# 杰卡德相似度\n", "user_a = np.array(['平板电脑', '尿不湿', '手机', '键盘', '手机支架', '奶瓶', '婴儿辅食', '基围虾', '巴沙鱼', '生抽', '沙拉酱'])\n", "user_b = np.array(['平板电脑', '键盘', '充电宝', '补光灯', '生抽', '散热器', '笔记本电脑', '双肩包', '登山杖', '露营帐篷', '睡袋'])\n", "user_c = np.array(['沐浴露', '维C泡腾片', '牛奶', '尿不湿', '平板电脑', '奶瓶', '婴儿辅食', '手机', '磨牙棒', '生抽', '基围虾'])" ] }, { "cell_type": "code", "execution_count": 22, "id": "c4132979-1ef5-4e2b-93a4-2d6bfc66f38b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.15789473684210525" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.intersect1d(user_a, user_b).size / np.union1d(user_a, user_b).size" ] }, { "cell_type": "code", "execution_count": 23, "id": "46dda506-908b-405a-8e9b-c14090da05b7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.4666666666666667" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.intersect1d(user_a, user_c).size / np.union1d(user_a, user_c).size" ] }, { "cell_type": "code", "execution_count": 24, "id": "fb5435f8-b8d4-4b37-88fb-13149a62660e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['巴沙鱼', '手机支架', '沙拉酱', '键盘'], dtype=' 50], [array11 * 10, array11 // 10], default=100)" ] }, { "cell_type": "code", "execution_count": 44, "id": "4d7b8700-a431-4da4-99c6-54eda9f065dd", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 60, 260, 440, 10, 7, 260, 150, 430, 9, 7])" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 给出一个条件和两个表达式，满足条件执行表达式1，不满足条件执行表达式2\n", "np.where(array11 < 50, array11 * 10, array11 // 10)" ] }, { "cell_type": "code", "execution_count": 45, "id": "bc672588-f36b-4951-902c-6b70ef83d1af", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 6, 26, 44, 1, 77, 26, 15, 43, 93, 72])" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "array11" ] }, { "cell_type": "code", "execution_count": 46, "id": "aa96b613-cef4-475e-b88c-f6ee8194ef4c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([93, 72, 6, 26, 44, 1, 77, 26, 15, 43])" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 滚动数组元素\n", "np.roll(array11, 2)" ] }, { "cell_type": "code", "execution_count": 47, "id": "bfeed7b4-d835-4a1a-9858-d27d4bc363c3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([44, 1, 77, 26, 15, 43, 93, 72, 6, 26])" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.roll(array11, -2)" ] }, { "cell_type": "code", "execution_count": 48, "id": "52b9da9d-dd2b-4fea-984c-6d4b94efedab", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[6, 6, 1],\n", " [1, 1, 2],\n", " [2, 2, 3],\n", " [3, 3, 4],\n", " [4, 4, 5],\n", " [5, 5, 6]])" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.roll(array10, 2)" ] }, { "cell_type": "code", "execution_count": 49, "id": "53aa8382-51e2-4634-bb9f-4ff84ddaef60", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[5, 5, 5],\n", " [6, 6, 6],\n", " [1, 1, 1],\n", " [2, 2, 2],\n", " [3, 3, 3],\n", " [4, 4, 4]])" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.roll(array10, 2, axis=0)" ] }, { "cell_type": "code", "execution_count": 50, "id": "5f2de13b-2773-4a87-a854-ee9486dfcc0d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1, 2, 3],\n", " [4, 5, 6],\n", " [7, 8, 9]])" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "array12 = np.arange(1, 10).reshape((3, 3))\n", "array12" ] }, { "cell_type": "code", "execution_count": 51, "id": "3808483f-2811-45fd-adbb-a10bcc9d7dc6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[4, 5, 6],\n", " [7, 8, 9],\n", " [1, 2, 3]])" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.roll(array12, 2, axis=0)" ] }, { "cell_type": "code", "execution_count": 52, "id": "7f6d8d16-2e35-4926-ba13-d5e9cf77660d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[3, 1, 2],\n", " [6, 4, 5],\n", " [9, 7, 8]])" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.roll(array12, 1, axis=1)" ] }, { "cell_type": "code", "execution_count": 53, "id": "29b76e3a-8414-4658-b0c6-7795f2186fbe", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 6, 33, 44, 88, 77, 33, 15, 88, 93, 72])" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 替换数组元素\n", "np.put(array11, [1, 3, 5, 7], [33, 88])\n", "array11" ] }, { "cell_type": "code", "execution_count": 54, "id": "eb4653ef-5c5a-4d3c-adb2-0f10d79ca6c6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 6, 33, 44, 44, 99, 33, 15, 44, 99, 44])" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.place(array11, array11 > 50, [44, 99])\n", "array11" ] }, { "cell_type": "code", "execution_count": 55, "id": "03bcc628-1471-44ef-ad56-88468c08548d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(750, 500, 3)" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "guido_image = plt.imread('res/guido.jpg')\n", "guido_image.shape" ] }, { "cell_type": "code", "execution_count": 56, "id": "16fa2543-195b-47d6-8e29-2682800f91aa", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/svg+xml": [ "\n", "\n", "\n" ], "text/plain": [ "

" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.imshow(np.flip(guido_image, axis=0))" ] }, { "cell_type": "code", "execution_count": 57, "id": "259830fa-6cd3-43be-bffe-560050f795b9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/svg+xml": [ "\n", "\n", "\n" ], "text/plain": [ "

" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.imshow(np.flip(guido_image, axis=1))" ] }, { "cell_type": "code", "execution_count": 58, "id": "6059087d-85ba-4151-9af0-8870876a6b1a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/svg+xml": [ "\n", "\n", "\n" ], "text/plain": [ "

" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.imshow(np.flip(guido_image, axis=2))" ] }, { "cell_type": "code", "execution_count": 59, "id": "70ebbf72-87c9-41d8-8c46-f1eccb531206", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/svg+xml": [ "\n", "\n", "\n" ], "text/plain": [ "

" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.imshow(guido_image)" ] }, { "cell_type": "code", "execution_count": 60, "id": "015548b9-819c-49ba-a13d-7777662a7414", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/svg+xml": [ "\n", "\n", "\n" ], "text/plain": [ "

" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.imshow(guido_image.swapaxes(0, 1))" ] }, { "cell_type": "markdown", "id": "7c4c00de-f16f-4aac-ae37-e0b9df0eb6c2", "metadata": {}, "source": [ "#### 普通函数矢量化" ] }, { "cell_type": "code", "execution_count": 61, "id": "29d231a2-57cd-4786-85cd-1366f5378185", "metadata": {}, "outputs": [], "source": [ "# 通过vectorize装饰器将普通函数做矢量化处理\n", "@np.vectorize\n", "def fac(n):\n", " if n == 0:\n", " return 1\n", " return n * fac(n - 1)" ] }, { "cell_type": "code", "execution_count": 62, "id": "7c04cc06-ba86-4b1b-a4e1-75e4527f6dde", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([1, 2, 3, 4, 5, 6, 7, 8])" ] }, "execution_count": 62, "metadata": {}, "output_type": "execute_result" } ], "source": [ "temp = np.arange(1, 9)\n", "temp" ] }, { "cell_type": "code", "execution_count": 63, "id": "38e8026a-6e75-4e1b-a646-98c86736797f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 1, 2, 6, 24, 120, 720, 5040, 40320])" ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fac(temp)" ] }, { "cell_type": "code", "execution_count": 64, "id": "4ac8bb35-87f0-44d0-930e-3fc0c5fb63c3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(array([24, 70, 79, 26, 41, 53, 56, 59, 72, 21]),\n", " array([63, 56, 32, 59, 51, 60, 62, 58, 67, 59]))" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x1 = np.random.randint(20, 80, 10)\n", "x2 = np.random.randint(30, 70, 10)\n", "x1, x2" ] }, { "cell_type": "code", "execution_count": 65, "id": "4baced9b-fee2-4c2b-b7ca-dc5914b120b9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 3, 14, 1, 1, 1, 1, 2, 1, 1, 1])" ] }, "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from math import gcd, lcm\n", "\n", "gcd = np.vectorize(gcd)\n", "gcd(x1, x2)" ] }, { "cell_type": "code", "execution_count": 66, "id": "08c6a075-8b61-4716-92c5-126cd78108c1", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 504, 280, 2528, 1534, 2091, 3180, 1736, 3422, 4824, 1239])" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lcm = np.vectorize(lcm)\n", "lcm(x1, x2)" ] }, { "cell_type": "markdown", "id": "859bba4b-a0cf-4140-a8de-b2f3fffcd355", "metadata": {}, "source": [ "### 广播机制\n", "\n", "两个形状（shape属性）不一样的数组如果要做运算，要先通过广播机制使其形状一样才能运算。
\n", "如果要执行广播机制使得两个数组形状一样，需要满足以下两个条件其中一个：\n", "\n", "1. 两个数组后缘维度（shape属性从后往前看对应的部分）相同。\n", "2. 两个数组后缘维度不同，但是其中一方为1。" ] }, { "cell_type": "code", "execution_count": 67, "id": "3339c56a-b68c-401e-a27c-134be60ccf14", "metadata": {}, "outputs": [], "source": [ "temp1 = np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2], [3, 3, 3]])\n", "temp2 = np.array([1, 2, 3])" ] }, { "cell_type": "code", "execution_count": 68, "id": "4ecc9498-792c-4de1-a120-585003b19087", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1, 2, 3],\n", " [2, 3, 4],\n", " [3, 4, 5],\n", " [4, 5, 6]])" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "temp1 + temp2" ] }, { "cell_type": "code", "execution_count": 69, "id": "2cb20c46-3f87-4346-80ab-7e2786ca1475", "metadata": {}, "outputs": [], "source": [ "temp3 = np.array([[1], [2], [3], [4]])" ] }, { "cell_type": "code", "execution_count": 70, "id": "58932222-bb83-43b7-8cef-b9574898dbb5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1, 1, 1],\n", " [3, 3, 3],\n", " [5, 5, 5],\n", " [7, 7, 7]])" ] }, "execution_count": 70, "metadata": {}, "output_type": "execute_result" } ], "source": [ "temp1 + temp3" ] }, { "cell_type": "code", "execution_count": 71, "id": "74b6c376-05ac-4049-a4b9-5f0614de780e", "metadata": {}, "outputs": [], "source": [ "temp4 = np.array([1 ,2, 3])\n", "temp5 = np.array([[3], [2], [1]])" ] }, { "cell_type": "code", "execution_count": 72, "id": "eefe6354-4614-4dfb-aa94-3d146b770b3b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(3,)" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "temp4.shape" ] }, { "cell_type": "code", "execution_count": 73, "id": "04dd44a5-6bea-41eb-aad0-b0a18d64a512", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(3, 1)" ] }, "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ "temp5.shape" ] }, { "cell_type": "code", "execution_count": 74, "id": "764eb0c3-991d-4a5f-a4eb-7da2c0b445bd", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[4, 5, 6],\n", " [3, 4, 5],\n", " [2, 3, 4]])" ] }, "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ "temp4 + temp5" ] }, { "cell_type": "markdown", "id": "8f1022cb-c07a-4149-aafd-9f53d235da4f", "metadata": {}, "source": [ "### 矩阵" ] }, { "cell_type": "code", "execution_count": 75, "id": "8a16c29a-088a-473b-b856-3dbd6660f9cd", "metadata": {}, "outputs": [], "source": [ "m1 = np.array([[1, 0, 2], [-1, 3, 1]])\n", "m2 = np.array([[3, 1], [2, 1], [1, 0]])" ] }, { "cell_type": "code", "execution_count": 76, "id": "74a54196-ca95-4425-9212-62869795aed7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(2, 3)" ] }, "execution_count": 76, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m1.shape" ] }, { "cell_type": "code", "execution_count": 77, "id": "1afbceea-782d-49c9-b6d4-d0848f3fdd99", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(3, 2)" ] }, "execution_count": 77, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m2.shape" ] }, { "cell_type": "code", "execution_count": 78, "id": "bc65a37a-84eb-4aac-b63f-5625a0c15a3a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[5, 1],\n", " [4, 2]])" ] }, "execution_count": 78, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m1 @ m2" ] }, { "cell_type": "code", "execution_count": 79, "id": "67be1c96-a20c-4862-87cb-8579d3a303f5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[5, 1],\n", " [4, 2]])" ] }, "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.matmul(m1, m2)" ] }, { "cell_type": "markdown", "id": "5fd602a8-61fc-4c5c-94a6-a930bcf6fb2f", "metadata": {}, "source": [ "$$\n", "\\begin{cases}\n", "x_1 + 2x_2 + x_3 = 8 \\\\\n", "3x_1 + 7x_2 + 2x_3 = 23 \\\\\n", "2x_1 + 2x_2 + x_3 = 9\n", "\\end{cases}\n", "$$" ] }, { "cell_type": "markdown", "id": "d41b4856-79c3-4f48-9d8e-58c8d6045884", "metadata": {}, "source": [ "$$\n", "\\boldsymbol{A} = \\begin{bmatrix}\n", "1 & 2 & 1\\\\\n", "3 & 7 & 2\\\\\n", "2 & 2 & 1\n", "\\end{bmatrix}, \\quad\n", "\\boldsymbol{x} = \\begin{bmatrix}\n", "x_1 \\\\\n", "x_2\\\\\n", "x_3\n", "\\end{bmatrix}, \\quad\n", "\\boldsymbol{b} = \\begin{bmatrix}\n", "8 \\\\\n", "23\\\\\n", "9\n", "\\end{bmatrix}\n", "$$" ] }, { "cell_type": "code", "execution_count": 80, "id": "edb85b44-50b3-4115-8539-5023a19bb2a1", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1., 2., 3.],\n", " [4., 5., 6.],\n", " [7., 8., 8.]])" ] }, "execution_count": 80, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m3 = np.arange(1, 10, dtype='f8').reshape(3, 3)\n", "m3[-1, -1] = 8\n", "m3" ] }, { "cell_type": "code", "execution_count": 81, "id": "0b90dbf4-05f7-43ad-a37d-20d48d03dd3a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3" ] }, "execution_count": 81, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 计算矩阵的秩\n", "np.linalg.matrix_rank(m3)" ] }, { "cell_type": "code", "execution_count": 82, "id": "a442ab40-97a9-4cf5-8d61-bfcaa3561655", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[-2.66666667, 2.66666667, -1. ],\n", " [ 3.33333333, -4.33333333, 2. ],\n", " [-1. , 2. , -1. ]])" ] }, "execution_count": 82, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 逆矩阵 - 奇异矩阵不能求逆矩阵\n", "# LinAlgError: Singular matrix\n", "np.linalg.inv(m3)" ] }, { "cell_type": "code", "execution_count": 83, "id": "19be7ff8-6a71-40ad-8e37-20008c56be7a", "metadata": {}, "outputs": [], "source": [ "# 有唯一解决的条件：系数矩阵的秩等于增广矩阵的秩，同时跟未知数的个数相同。\n", "# 秩（rank）：线性无关的行或者列的数量。\n", "# 线性相关：一个向量可以通过其他向量做线性变换（数乘和加法）得到，那么它们就是线性相关的。" ] }, { "cell_type": "code", "execution_count": 84, "id": "87043db3-e2bd-4a70-950a-d74163afc4d1", "metadata": {}, "outputs": [], "source": [ "A = np.array([[1, 2, 1], [3, 7, 2], [2, 2, 1]])\n", "b = np.array([8, 23, 9]).reshape(-1, 1)" ] }, { "cell_type": "code", "execution_count": 85, "id": "479d272b-2b46-4374-93f3-54e13af52d59", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3" ] }, "execution_count": 85, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 系数矩阵的秩\n", "np.linalg.matrix_rank(A)" ] }, { "cell_type": "code", "execution_count": 86, "id": "faa35591-09da-4232-9b38-72ee2fb824cc", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3" ] }, "execution_count": 86, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 增广矩阵的秩\n", "np.linalg.matrix_rank(np.hstack((A, b)))" ] }, { "cell_type": "code", "execution_count": 87, "id": "e8d02dee-7d86-4f9d-8e33-77b5a76784a3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1.],\n", " [2.],\n", " [3.]])" ] }, "execution_count": 87, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 解线性方程组\n", "np.linalg.solve(A, b)" ] }, { "cell_type": "markdown", "id": "336ee288-5be1-41e5-89cb-e22f465efdd2", "metadata": {}, "source": [ "$$\n", "A \\cdot x = b\n", "$$\n", "$$\n", "A^{-1} \\cdot A \\cdot x = A^{-1} \\cdot b\n", "$$\n", "$$\n", "I \\cdot x = A^{-1} \\cdot b\n", "$$\n", "$$\n", "x = A^{-1} \\cdot b\n", "$$" ] }, { "cell_type": "code", "execution_count": 88, "id": "7c2bd2fd-8867-4dad-8bbc-b5b175f690b7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1.],\n", " [2.],\n", " [3.]])" ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 通过逆矩阵解线性方程组\n", "np.linalg.inv(A) @ b" ] }, { "cell_type": "markdown", "id": "cd91252a-31d4-40d5-82ef-71f22f0bd39c", "metadata": {}, "source": [ "#### 补充 - 用scipy处理图像" ] }, { "cell_type": "code", "execution_count": 89, "id": "9052864d-a32d-4cd3-9e2a-8df1654b8a67", "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n" ], "text/plain": [ "

" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from scipy.ndimage import gaussian_filter, sobel\n", "\n", "# 获取灰度图\n", "guido_image = plt.imread('res/guido.jpg')\n", "gray_image = np.mean(guido_image, axis=2)\n", "\n", "plt.figure(figsize=(12, 4))\n", "\n", "# 灰度图\n", "plt.subplot(1, 4, 1)\n", "plt.imshow(gray_image, cmap=plt.cm.gray)\n", "\n", "# 模糊和锐化\n", "plt.subplot(1, 4, 2)\n", "blurred_image = gaussian_filter(gray_image, 3)\n", "plt.imshow(blurred_image, cmap=plt.cm.gray)\n", "\n", "plt.subplot(1, 4, 3)\n", "filtered_image = gaussian_filter(blurred_image, 1)\n", "sharpen_image = blurred_image + 32 * (blurred_image - filtered_image)\n", "plt.imshow(sharpen_image, cmap=plt.cm.gray)\n", "\n", "# 边缘图\n", "plt.subplot(1, 4, 4)\n", "# 使用索贝尔算子（邻点灰度加权差）进行边缘检测\n", "edge_image = sobel(gray_image)\n", "plt.imshow(edge_image, cmap=plt.cm.gray)\n", "\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 90, "id": "dbbc6edf-c643-4cb4-a98b-05e806413478", "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n" ], "text/plain": [ "

" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from scipy.ndimage import rotate, zoom\n", "\n", "plt.figure(figsize=(12, 4))\n", "\n", "# 旋转\n", "plt.subplot(1, 3, 1)\n", "rotated_image = rotate(guido_image, -16, reshape=True)\n", "plt.imshow(rotated_image)\n", "\n", "# 旋转\n", "plt.subplot(1, 3, 2)\n", "rotated_image = rotate(guido_image, -16, reshape=False)\n", "plt.imshow(rotated_image)\n", "\n", "# 缩放\n", "plt.subplot(1, 3, 3)\n", "scaled_image = zoom(guido_image, zoom=(0.8, 1.25, 1))\n", "plt.imshow(scaled_image)\n", "\n", "plt.show()" ] }, { "cell_type": "markdown", "id": "b6989fb7-a3ab-4889-80be-f4ecce033e5c", "metadata": {}, "source": [ "### 多项式" ] }, { "cell_type": "code", "execution_count": 91, "id": "0c719ca9-de74-4c2e-aaf2-d0a9c833f512", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 3\n", "3 x + 2 x + 1\n", " 2\n", "1 x + 2 x + 3\n" ] } ], "source": [ "# NumPy老版本用poly1d表示多项式\n", "p1 = np.poly1d([3, 0, 2, 1])\n", "p2 = np.poly1d([1, 2, 3])\n", "print(p1)\n", "print(p2)" ] }, { "cell_type": "code", "execution_count": 92, "id": "e6aab279-fa9f-4d4e-b597-b3c51b478777", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 3 2\n", "3 x + 1 x + 4 x + 4\n" ] } ], "source": [ "# 多项式加法\n", "print(p1 + p2)" ] }, { "cell_type": "code", "execution_count": 93, "id": "375a1989-7231-432c-bf33-06fce490cacf", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 5 4 3 2\n", "3 x + 6 x + 11 x + 5 x + 8 x + 3\n" ] } ], "source": [ "# 多项式乘法\n", "print(p1 * p2)" ] }, { "cell_type": "code", "execution_count": 94, "id": "4df5662a-c0bc-434d-b53f-623e09b51a76", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "11" ] }, "execution_count": 94, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 令x=2，计算多项式的值\n", "p2(2)" ] }, { "cell_type": "code", "execution_count": 95, "id": "df150df6-9904-45b7-8704-dbb033552dcb", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 2\n", "9 x + 2\n" ] } ], "source": [ "# 求导\n", "print(p1.deriv())" ] }, { "cell_type": "code", "execution_count": 96, "id": "d2d3002c-59d6-4e00-b00f-69f3f6ac6609", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 4 2\n", "0.75 x + 1 x + 1 x\n" ] } ], "source": [ "# 求不定积分\n", "print(p1.integ())" ] }, { "cell_type": "code", "execution_count": 97, "id": "0965b9e9-42fc-425c-90cb-d265e3f6e42f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 2\n", "1 x + 3 x + 2\n", "[-2. -1.]\n" ] } ], "source": [ "p3 = np.poly1d([1, 3, 2])\n", "print(p3)\n", "# 令多项式等于0，求解x\n", "print(p3.roots)" ] }, { "cell_type": "code", "execution_count": 98, "id": "c5bda2d2-0d61-46db-ac48-0f7ec3438660", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "numpy.poly1d" ] }, "execution_count": 98, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(p3)" ] }, { "cell_type": "code", "execution_count": 99, "id": "a71c4799-183e-4096-9903-7edf9464feb7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1.0 + 2.0·x + 0.0·x² + 3.0·x³\n" ] } ], "source": [ "from numpy.polynomial import Polynomial\n", "\n", "# NumPy新版本用Polynomial表示多项式\n", "p1 = Polynomial([1, 2, 0, 3])\n", "print(p1)" ] }, { "cell_type": "code", "execution_count": 100, "id": "0002c6e3-697b-425c-a17c-802e10f07981", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2.0 + 0.0·x + 9.0·x²\n" ] } ], "source": [ "print(p1.deriv())" ] }, { "cell_type": "code", "execution_count": 101, "id": "1f9dde21-0854-4ae6-9703-9599a4204003", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.0 + 1.0·x + 1.0·x² + 0.0·x³ + 0.75·x⁴\n" ] } ], "source": [ "print(p1.integ())" ] }, { "cell_type": "code", "execution_count": 102, "id": "eb191b28-fb2c-460b-9c87-0041b128ba16", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3" ] }, "execution_count": 102, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 最高次项\n", "p1.degree()" ] }, { "cell_type": "markdown", "id": "2d6c3103-c5b0-413f-b2de-324b0394e3ae", "metadata": {}, "source": [ "### 最小二乘解" ] }, { "cell_type": "code", "execution_count": 103, "id": "7163cb46-44bb-487c-8dd2-7c530574ecc0", "metadata": {}, "outputs": [], "source": [ "# 每月收入\n", "x = np.array([3200, 4811, 5386, 5564, 6120, 6691, 6906, 7483, 7587, 7890,\n", " 8090, 8300, 8650, 8835, 8975, 9070, 9100, 9184, 9247, 9313, \n", " 9465, 9558, 9853, 9938, 10020, 10242, 10343, 10731, 10885, 10990, \n", " 11100, 11227, 11313, 11414, 11630, 11806, 11999, 12038, 12400, 12547, \n", " 12890, 13050, 13360, 13850, 14890, 14990, 15500, 16899, 17010, 19880])\n", "# 每月网购支出\n", "y = np.array([1761, 882, 1106, 182, 1532, 1978, 2174, 2117, 2134, 1924, \n", " 2207, 2876, 2617, 2683, 3054, 3277, 3345, 3462, 3401, 3591,\n", " 3596, 3671, 3829, 3907, 3852, 4288, 4359, 4099, 4300, 4367,\n", " 5019, 4873, 4674, 5174, 4666, 5797, 5782, 5451, 5487, 5448,\n", " 6002, 6439, 6309, 6045, 5935, 6928, 7356, 6682, 6672, 6582])" ] }, { "cell_type": "code", "execution_count": 104, "id": "a68a1d0d-0dc9-4437-bed9-6e9aff589df1", "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n" ], "text/plain": [ "

" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# 定性分析 - 散点图\n", "plt.scatter(x, y)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 105, "id": "89030441-eefd-4a02-ad41-ba154dbf87b2", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(ShapiroResult(statistic=0.9830237255502384, pvalue=0.6844559821829901),\n", " ShapiroResult(statistic=0.9789829625124067, pvalue=0.5099101868610301))" ] }, "execution_count": 105, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from scipy import stats\n", "\n", "# 夏皮洛检验（正态性判定）\n", "stats.shapiro(x), stats.shapiro(y)" ] }, { "cell_type": "code", "execution_count": 106, "id": "93573426-e1c0-4025-8b8f-2d83d69e103c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1. , 0.93422273],\n", " [0.93422273, 1. ]])" ] }, "execution_count": 106, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 定量分析 - 相关系数 - correlation coefficient\n", "# 皮尔逊相关系数（标准化的协方差 - [-1, 1]）\n", "# 1. 连续值且成对出现\n", "# 2. 没有异常值\n", "# 3. 来自于正态总体\n", "np.corrcoef(x, y)" ] }, { "cell_type": "code", "execution_count": 107, "id": "4b6e47de-8e3b-4be4-a5e4-f1d95649e6f9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "PearsonRResult(statistic=0.9342227278473364, pvalue=3.9566343708624996e-23)" ] }, "execution_count": 107, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 计算皮尔逊相关系数\n", "stats.pearsonr(x, y)" ] }, { "cell_type": "code", "execution_count": 108, "id": "f8e53b1e-b2b1-4222-a69a-dcbca0041d0e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "50" ] }, "execution_count": 108, "metadata": {}, "output_type": "execute_result" } ], "source": [ "history_data = {key: value for key, value in zip(x, y)}\n", "len(history_data)" ] }, { "cell_type": "code", "execution_count": 109, "id": "1530478e-ec70-4f87-828f-1d6798c53c5e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[67, 88, 28, 95, 96, 10, 70, 80, 84, 8, 19, 68, 1, 90, 39]" ] }, "execution_count": 109, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = np.random.randint(1, 100, 15).tolist()\n", "data" ] }, { "cell_type": "code", "execution_count": 110, "id": "7d246563-86fd-418f-b058-4aae08d9b9c1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1, 8, 10]\n", "[96, 95, 90, 88, 84]\n" ] } ], "source": [ "import heapq\n", "\n", "# 通过堆（heap）结构快速的找到TopN元素\n", "print(heapq.nsmallest(3, data))\n", "print(heapq.nlargest(5, data))" ] }, { "cell_type": "code", "execution_count": 111, "id": "866f9fd9-58aa-4c03-9415-99aa236958d4", "metadata": {}, "outputs": [], "source": [ "# 目标：因为月收入和网购支出之间有强相关关系，所以我们可以通过月收入预测网购支出\n", "# 方法1：输入一个月收入，找到跟这个收入最接近的N条数据，用它们的平均值预测对应的网购支出\n", "# KNN - k最近邻算法（找到k个最近的邻居，用这k个邻居的数据来做出预测）\n", "import heapq\n", "\n", "\n", "def predicate_by_knn(income, k=5):\n", " \"\"\"KNN算法\"\"\"\n", " keys = heapq.nsmallest(k, history_data, key=lambda x: (x - income) ** 2)\n", " return np.mean([history_data[key] for key in keys]).round(2)" ] }, { "cell_type": "code", "execution_count": 112, "id": "399b9b6a-6625-4dc2-a09c-c0699c538a46", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "5937.0" ] }, "execution_count": 112, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predicate_by_knn(12800)" ] }, { "cell_type": "code", "execution_count": 113, "id": "db0ee4bf-83f8-489f-a543-80854571da60", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1987.0" ] }, "execution_count": 113, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predicate_by_knn(6800)" ] }, { "cell_type": "code", "execution_count": 114, "id": "6e514003-c143-4823-91eb-57f7608f6137", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "6645.33" ] }, "execution_count": 114, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predicate_by_knn(20000, k=3)" ] }, { "cell_type": "markdown", "id": "aa44d3ad-aa3f-44db-889a-8a69b9a9be65", "metadata": {}, "source": [ "回归模型：\n", "$$ Y = aX + b $$\n", "\n", "损失函数：\n", "$$ MSE = \\frac{1} {N} \\sum (\\hat{y_i} - y_i)^2 $$" ] }, { "cell_type": "code", "execution_count": 115, "id": "032eb740-ac9a-4526-9eb4-b6770907b07a", "metadata": {}, "outputs": [], "source": [ "# MSE - Mean Squared Error\n", "def get_loss(a, b):\n", " \"\"\"损失函数\"\"\"\n", " y_hat = a * x + b\n", " return np.mean((y_hat - y) ** 2)" ] }, { "cell_type": "code", "execution_count": 116, "id": "0d56ccc4-5a0d-4b3a-89ab-01cf690c4fba", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1120507.2896234244\n", "808664.5200191085\n", "443441.57239664154\n", "408322.6807976254\n", "394598.0903141962\n", "394480.4982102699\n", "394457.8332210129\n", "393928.3764661801\n", "393882.835806886\n", "393829.0258808886\n", "393817.59355663764\n", "0.5068829877448287 -1209.315185532824\n" ] } ], "source": [ "# 蒙特卡洛模拟（随机瞎蒙法）\n", "import random\n", "\n", "min_loss = np.inf\n", "ba, bb = None, None\n", "\n", "for _ in range(10000):\n", " a = random.random() * 0.5 + 0.5\n", " b = random.random() * 1000 - 2000\n", " curr_loss = get_loss(a, b)\n", " if curr_loss < min_loss:\n", " min_loss = curr_loss\n", " ba, bb = a, b\n", " print(min_loss)\n", "print(ba, bb)" ] }, { "cell_type": "code", "execution_count": 117, "id": "af5f7d02-f921-4712-82e3-1ce43c86c708", "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n" ], "text/plain": [ "

" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.scatter(x, y)\n", "plt.plot(x, ba * x + bb, color='r', linewidth=4)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 118, "id": "207c2da0-371f-40ea-b19f-f56cca8a7d30", "metadata": {}, "outputs": [], "source": [ "def predicate_by_regression(income):\n", " return round(ba * income + bb, 2)" ] }, { "cell_type": "code", "execution_count": 119, "id": "a9e3af32-654c-4704-9f32-cfe41f067d06", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2237.49" ] }, "execution_count": 119, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predicate_by_regression(6800)" ] }, { "cell_type": "code", "execution_count": 120, "id": "5133079a-59f8-4435-af1e-fb5bc13c457d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "5278.79" ] }, "execution_count": 120, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predicate_by_regression(12800)" ] }, { "cell_type": "markdown", "id": "200b9821-b8c8-41df-ae8e-d96bf5049415", "metadata": {}, "source": [ "将回归模型带入损失函数：\n", "$$ f(a, b) = \\frac {1} {N} \\sum_{i=1}^{N}(y_i - (ax_i + b))^2 $$\n", "\n", "如何让$f(a, b)$取到最小值？？？\n", "\n", "求偏导数，并令其等于0。\n", "$$ \\frac {\\partial {f(a, b)}} {\\partial {a}} = \\frac {2} {N} \\sum_{i=1}^{N}(-x_iy_i + x_i^2a + x_ib) = 0 $$ \n", "$$ \\frac {\\partial {f(a, b)}} {\\partial {b}} = \\frac {2} {N} \\sum_{i=1}^{N}(-y_i + x_ia + b) = 0 $$\n", "\n", "求解得到：\n", "$$a = \\frac{\\sum(x_{i} - \\bar{x})(y_{i} - \\bar{y})}{\\sum(x_{i} - \\bar{x})^{2}}$$\n", "$$b = \\bar{y} - a\\bar{x}$$" ] }, { "cell_type": "code", "execution_count": 121, "id": "9ab79db5-85c7-44ad-8ffa-2684fb499f7a", "metadata": {}, "outputs": [], "source": [ "x_bar, y_bar = np.mean(x), np.mean(y)" ] }, { "cell_type": "code", "execution_count": 122, "id": "332777b5-bc3f-44d2-bd54-f8e9a19497aa", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(0.5079223873753402, -1227.104582703003)" ] }, "execution_count": 122, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ba = np.dot((x - x_bar), (y - y_bar)) / np.sum((x - x_bar) ** 2)\n", "bb = y_bar - ba * x_bar\n", "ba, bb" ] }, { "cell_type": "code", "execution_count": 123, "id": "795f6a40-136a-4772-9e2b-5798022e408d", "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n" ], "text/plain": [ "

" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.scatter(x, y)\n", "plt.plot(x, a * x ** 2 + b * x + c, color='r', linewidth=4)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 127, "id": "3363f359-3615-4ffa-a040-41e39c83b38f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([-1.22710458e+03, 5.07922387e-01])" ] }, "execution_count": 127, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Polynomial.fit(x, y, deg=1).convert().coef" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" } }, "nbformat": 4, "nbformat_minor": 5 }