mirror of https://github.com/kubeflow/examples.git
2259 lines
90 KiB
Plaintext
2259 lines
90 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"tags": []
|
||
},
|
||
"source": [
|
||
"# Kaggle Getting Started Prediction Competition: Store Sales - Time Series Forecasting"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"tags": []
|
||
},
|
||
"source": [
|
||
"In this [competition](https://www.kaggle.com/competitions/store-sales-time-series-forecasting), we will use time-series forecasting to forecast store sales on data from Corporación Favorita, a large Ecuadorian-based grocery retailer. The notebook is a buildup of hands-on-exercises presented in Kaggle Learn course of [Time Series Course](https://www.kaggle.com/learn/time-series) where you will learn to leverage periodic trends for forecasting as well as combine different models such as linear regression and XGBoost to perfect your forecasting. For the purpose of this tutorial we are looking at periodic trend for forecasting."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"tags": []
|
||
},
|
||
"source": [
|
||
"## Install necessary packages\n",
|
||
"\n",
|
||
"We can install the necessary package by either running `pip install --user <package_name>` or include everything in a `requirements.txt` file and run `pip install --user -r requirements.txt`. We have put the dependencies in a `requirements.txt` file so we will use the former method.\n",
|
||
"\n",
|
||
"Restart the kernel after installation"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# !pip install --user -r requirements.txt"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
|
||
"import os\n",
|
||
"from sklearn.linear_model import LinearRegression\n",
|
||
"from statsmodels.tsa.deterministic import CalendarFourier, DeterministicProcess\n",
|
||
"from sklearn.preprocessing import OneHotEncoder\n",
|
||
"from sklearn.model_selection import train_test_split\n",
|
||
"from sklearn.metrics import mean_absolute_error"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"path = 'data'\n",
|
||
"# path = os.getcwd()\n",
|
||
"\n",
|
||
"train_data_filepath = path + \"/train.csv\"\n",
|
||
"test_data_filepath = path + \"/test.csv\"\n",
|
||
"holidays_filepath = path + \"/holidays_events.csv\"\n",
|
||
"\n",
|
||
"# Read the csv files into dataframes\n",
|
||
"# Training data\n",
|
||
"train_sales = pd.read_csv(train_data_filepath,\n",
|
||
" usecols=['store_nbr', 'family', 'date', 'sales'],\n",
|
||
" dtype={\n",
|
||
" 'store_nbr': 'category',\n",
|
||
" 'family': 'category',\n",
|
||
" 'sales': 'float32',\n",
|
||
" },\n",
|
||
" parse_dates=['date'],\n",
|
||
" infer_datetime_format=True,\n",
|
||
")\n",
|
||
"train_sales['date'] = train_sales.date.dt.to_period('D')\n",
|
||
"train_sales = train_sales.set_index(['store_nbr', 'family', 'date']).sort_index()\n",
|
||
"\n",
|
||
"# Holiday features dataset\n",
|
||
"holidays_events = pd.read_csv(\n",
|
||
" holidays_filepath,\n",
|
||
" dtype={\n",
|
||
" 'type': 'category',\n",
|
||
" 'locale': 'category',\n",
|
||
" 'locale_name': 'category',\n",
|
||
" 'description': 'category',\n",
|
||
" 'transferred': 'bool',\n",
|
||
" },\n",
|
||
" parse_dates=['date'],\n",
|
||
" infer_datetime_format=True,\n",
|
||
")\n",
|
||
"holidays_events = holidays_events.set_index('date').to_period('D')\n",
|
||
"\n",
|
||
"\n",
|
||
"# Test data id required for submission of forecast sales\n",
|
||
"df_test = pd.read_csv(\n",
|
||
" test_data_filepath,\n",
|
||
" dtype={\n",
|
||
" 'store_nbr': 'category',\n",
|
||
" 'family': 'category',\n",
|
||
" 'onpromotion': 'uint32',\n",
|
||
" },\n",
|
||
" parse_dates=['date'],\n",
|
||
" infer_datetime_format=True,\n",
|
||
")\n",
|
||
"df_test['date'] = df_test.date.dt.to_period('D')\n",
|
||
"df_test = df_test.set_index(['store_nbr', 'family', 'date']).sort_index()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th>sales</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>store_nbr</th>\n",
|
||
" <th>family</th>\n",
|
||
" <th>date</th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th rowspan=\"5\" valign=\"top\">1</th>\n",
|
||
" <th rowspan=\"5\" valign=\"top\">AUTOMOTIVE</th>\n",
|
||
" <th>2013-01-01</th>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2013-01-02</th>\n",
|
||
" <td>2.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2013-01-03</th>\n",
|
||
" <td>3.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2013-01-04</th>\n",
|
||
" <td>3.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2013-01-05</th>\n",
|
||
" <td>5.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" sales\n",
|
||
"store_nbr family date \n",
|
||
"1 AUTOMOTIVE 2013-01-01 0.0\n",
|
||
" 2013-01-02 2.0\n",
|
||
" 2013-01-03 3.0\n",
|
||
" 2013-01-04 3.0\n",
|
||
" 2013-01-05 5.0"
|
||
]
|
||
},
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"train_sales.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>type</th>\n",
|
||
" <th>locale</th>\n",
|
||
" <th>locale_name</th>\n",
|
||
" <th>description</th>\n",
|
||
" <th>transferred</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>date</th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>2012-03-02</th>\n",
|
||
" <td>Holiday</td>\n",
|
||
" <td>Local</td>\n",
|
||
" <td>Manta</td>\n",
|
||
" <td>Fundacion de Manta</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2012-04-01</th>\n",
|
||
" <td>Holiday</td>\n",
|
||
" <td>Regional</td>\n",
|
||
" <td>Cotopaxi</td>\n",
|
||
" <td>Provincializacion de Cotopaxi</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2012-04-12</th>\n",
|
||
" <td>Holiday</td>\n",
|
||
" <td>Local</td>\n",
|
||
" <td>Cuenca</td>\n",
|
||
" <td>Fundacion de Cuenca</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2012-04-14</th>\n",
|
||
" <td>Holiday</td>\n",
|
||
" <td>Local</td>\n",
|
||
" <td>Libertad</td>\n",
|
||
" <td>Cantonizacion de Libertad</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2012-04-21</th>\n",
|
||
" <td>Holiday</td>\n",
|
||
" <td>Local</td>\n",
|
||
" <td>Riobamba</td>\n",
|
||
" <td>Cantonizacion de Riobamba</td>\n",
|
||
" <td>False</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" type locale locale_name description \\\n",
|
||
"date \n",
|
||
"2012-03-02 Holiday Local Manta Fundacion de Manta \n",
|
||
"2012-04-01 Holiday Regional Cotopaxi Provincializacion de Cotopaxi \n",
|
||
"2012-04-12 Holiday Local Cuenca Fundacion de Cuenca \n",
|
||
"2012-04-14 Holiday Local Libertad Cantonizacion de Libertad \n",
|
||
"2012-04-21 Holiday Local Riobamba Cantonizacion de Riobamba \n",
|
||
"\n",
|
||
" transferred \n",
|
||
"date \n",
|
||
"2012-03-02 False \n",
|
||
"2012-04-01 False \n",
|
||
"2012-04-12 False \n",
|
||
"2012-04-14 False \n",
|
||
"2012-04-21 False "
|
||
]
|
||
},
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"holidays_events.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>onpromotion</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>store_nbr</th>\n",
|
||
" <th>family</th>\n",
|
||
" <th>date</th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th rowspan=\"5\" valign=\"top\">1</th>\n",
|
||
" <th rowspan=\"5\" valign=\"top\">AUTOMOTIVE</th>\n",
|
||
" <th>2017-08-16</th>\n",
|
||
" <td>3000888</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-08-17</th>\n",
|
||
" <td>3002670</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-08-18</th>\n",
|
||
" <td>3004452</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-08-19</th>\n",
|
||
" <td>3006234</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-08-20</th>\n",
|
||
" <td>3008016</td>\n",
|
||
" <td>0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" id onpromotion\n",
|
||
"store_nbr family date \n",
|
||
"1 AUTOMOTIVE 2017-08-16 3000888 0\n",
|
||
" 2017-08-17 3002670 0\n",
|
||
" 2017-08-18 3004452 0\n",
|
||
" 2017-08-19 3006234 0\n",
|
||
" 2017-08-20 3008016 0"
|
||
]
|
||
},
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df_test.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# National and regional holidays of Ecuador in the training set\n",
|
||
"# Holiday features\n",
|
||
"holidays = (\n",
|
||
" holidays_events\n",
|
||
" .query(\"locale in ['National', 'Regional']\")\n",
|
||
" .loc['2017':'2017-08-15', ['description']]\n",
|
||
" .assign(description=lambda x: x.description.cat.remove_unused_categories())\n",
|
||
")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" description\n",
|
||
"date \n",
|
||
"2017-01-01 Primer dia del ano\n",
|
||
"2017-01-02 Traslado Primer dia del ano\n",
|
||
"2017-02-27 Carnaval\n",
|
||
"2017-02-28 Carnaval\n",
|
||
"2017-04-01 Provincializacion de Cotopaxi\n",
|
||
"2017-04-14 Viernes Santo\n",
|
||
"2017-05-01 Dia del Trabajo\n",
|
||
"2017-05-13 Dia de la Madre-1\n",
|
||
"2017-05-14 Dia de la Madre\n",
|
||
"2017-05-24 Batalla de Pichincha\n",
|
||
"2017-05-26 Traslado Batalla de Pichincha\n",
|
||
"2017-06-25 Provincializacion de Imbabura\n",
|
||
"2017-08-10 Primer Grito de Independencia\n",
|
||
"2017-08-11 Traslado Primer Grito de Independencia\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(holidays)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Create training data features\n",
|
||
"y = train_sales.unstack(['store_nbr', 'family']).loc[\"2017\"]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Using CalendarFourier to create fourier features \n",
|
||
"fourier = CalendarFourier(freq='M', order=4)\n",
|
||
"\n",
|
||
"# Using DeterministicProcess to create indicators for both \n",
|
||
"# weekly and monthly seasons\n",
|
||
"dp = DeterministicProcess(\n",
|
||
" index=y.index,\n",
|
||
" constant=True,\n",
|
||
" order=1,\n",
|
||
" seasonal=True, # weekly seasonality (indicators)\n",
|
||
" additional_terms=[fourier], # annual seasonality (fourier)\n",
|
||
" drop=True,\n",
|
||
")\n",
|
||
"\n",
|
||
"# `in_sample` creates features for the dates given in the `index` argument\n",
|
||
"X = dp.in_sample()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>const</th>\n",
|
||
" <th>trend</th>\n",
|
||
" <th>s(2,7)</th>\n",
|
||
" <th>s(3,7)</th>\n",
|
||
" <th>s(4,7)</th>\n",
|
||
" <th>s(5,7)</th>\n",
|
||
" <th>s(6,7)</th>\n",
|
||
" <th>s(7,7)</th>\n",
|
||
" <th>sin(1,freq=M)</th>\n",
|
||
" <th>cos(1,freq=M)</th>\n",
|
||
" <th>sin(2,freq=M)</th>\n",
|
||
" <th>cos(2,freq=M)</th>\n",
|
||
" <th>sin(3,freq=M)</th>\n",
|
||
" <th>cos(3,freq=M)</th>\n",
|
||
" <th>sin(4,freq=M)</th>\n",
|
||
" <th>cos(4,freq=M)</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>date</th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>2017-01-01</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-01-02</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.201299</td>\n",
|
||
" <td>0.979530</td>\n",
|
||
" <td>0.394356</td>\n",
|
||
" <td>0.918958</td>\n",
|
||
" <td>0.571268</td>\n",
|
||
" <td>0.820763</td>\n",
|
||
" <td>0.724793</td>\n",
|
||
" <td>0.688967</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-01-03</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.394356</td>\n",
|
||
" <td>0.918958</td>\n",
|
||
" <td>0.724793</td>\n",
|
||
" <td>0.688967</td>\n",
|
||
" <td>0.937752</td>\n",
|
||
" <td>0.347305</td>\n",
|
||
" <td>0.998717</td>\n",
|
||
" <td>-0.050649</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-01-04</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.571268</td>\n",
|
||
" <td>0.820763</td>\n",
|
||
" <td>0.937752</td>\n",
|
||
" <td>0.347305</td>\n",
|
||
" <td>0.968077</td>\n",
|
||
" <td>-0.250653</td>\n",
|
||
" <td>0.651372</td>\n",
|
||
" <td>-0.758758</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-01-05</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.724793</td>\n",
|
||
" <td>0.688967</td>\n",
|
||
" <td>0.998717</td>\n",
|
||
" <td>-0.050649</td>\n",
|
||
" <td>0.651372</td>\n",
|
||
" <td>-0.758758</td>\n",
|
||
" <td>-0.101168</td>\n",
|
||
" <td>-0.994869</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" const trend s(2,7) s(3,7) s(4,7) s(5,7) s(6,7) s(7,7) \\\n",
|
||
"date \n",
|
||
"2017-01-01 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
|
||
"2017-01-02 1.0 2.0 1.0 0.0 0.0 0.0 0.0 0.0 \n",
|
||
"2017-01-03 1.0 3.0 0.0 1.0 0.0 0.0 0.0 0.0 \n",
|
||
"2017-01-04 1.0 4.0 0.0 0.0 1.0 0.0 0.0 0.0 \n",
|
||
"2017-01-05 1.0 5.0 0.0 0.0 0.0 1.0 0.0 0.0 \n",
|
||
"\n",
|
||
" sin(1,freq=M) cos(1,freq=M) sin(2,freq=M) cos(2,freq=M) \\\n",
|
||
"date \n",
|
||
"2017-01-01 0.000000 1.000000 0.000000 1.000000 \n",
|
||
"2017-01-02 0.201299 0.979530 0.394356 0.918958 \n",
|
||
"2017-01-03 0.394356 0.918958 0.724793 0.688967 \n",
|
||
"2017-01-04 0.571268 0.820763 0.937752 0.347305 \n",
|
||
"2017-01-05 0.724793 0.688967 0.998717 -0.050649 \n",
|
||
"\n",
|
||
" sin(3,freq=M) cos(3,freq=M) sin(4,freq=M) cos(4,freq=M) \n",
|
||
"date \n",
|
||
"2017-01-01 0.000000 1.000000 0.000000 1.000000 \n",
|
||
"2017-01-02 0.571268 0.820763 0.724793 0.688967 \n",
|
||
"2017-01-03 0.937752 0.347305 0.998717 -0.050649 \n",
|
||
"2017-01-04 0.968077 -0.250653 0.651372 -0.758758 \n",
|
||
"2017-01-05 0.651372 -0.758758 -0.101168 -0.994869 "
|
||
]
|
||
},
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"ohe = OneHotEncoder(sparse=False)\n",
|
||
"\n",
|
||
"X_holidays = pd.DataFrame(\n",
|
||
" ohe.fit_transform(holidays),\n",
|
||
" index=holidays.index,\n",
|
||
" columns=holidays.description.unique(),\n",
|
||
")\n",
|
||
"\n",
|
||
"X_holidays = pd.get_dummies(holidays)\n",
|
||
"\n",
|
||
"# Join holiday features to training data\n",
|
||
"X_2= X.join(X_holidays, on='date').fillna(0.0)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>const</th>\n",
|
||
" <th>trend</th>\n",
|
||
" <th>s(2,7)</th>\n",
|
||
" <th>s(3,7)</th>\n",
|
||
" <th>s(4,7)</th>\n",
|
||
" <th>s(5,7)</th>\n",
|
||
" <th>s(6,7)</th>\n",
|
||
" <th>s(7,7)</th>\n",
|
||
" <th>sin(1,freq=M)</th>\n",
|
||
" <th>cos(1,freq=M)</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>description_Dia de la Madre-1</th>\n",
|
||
" <th>description_Dia del Trabajo</th>\n",
|
||
" <th>description_Primer Grito de Independencia</th>\n",
|
||
" <th>description_Primer dia del ano</th>\n",
|
||
" <th>description_Provincializacion de Cotopaxi</th>\n",
|
||
" <th>description_Provincializacion de Imbabura</th>\n",
|
||
" <th>description_Traslado Batalla de Pichincha</th>\n",
|
||
" <th>description_Traslado Primer Grito de Independencia</th>\n",
|
||
" <th>description_Traslado Primer dia del ano</th>\n",
|
||
" <th>description_Viernes Santo</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>date</th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>2017-01-01</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.000000</td>\n",
|
||
" <td>1.000000</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-01-02</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.201299</td>\n",
|
||
" <td>0.979530</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-01-03</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.394356</td>\n",
|
||
" <td>0.918958</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-01-04</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.571268</td>\n",
|
||
" <td>0.820763</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-01-05</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.724793</td>\n",
|
||
" <td>0.688967</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 29 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" const trend s(2,7) s(3,7) s(4,7) s(5,7) s(6,7) s(7,7) \\\n",
|
||
"date \n",
|
||
"2017-01-01 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
|
||
"2017-01-02 1.0 2.0 1.0 0.0 0.0 0.0 0.0 0.0 \n",
|
||
"2017-01-03 1.0 3.0 0.0 1.0 0.0 0.0 0.0 0.0 \n",
|
||
"2017-01-04 1.0 4.0 0.0 0.0 1.0 0.0 0.0 0.0 \n",
|
||
"2017-01-05 1.0 5.0 0.0 0.0 0.0 1.0 0.0 0.0 \n",
|
||
"\n",
|
||
" sin(1,freq=M) cos(1,freq=M) ... description_Dia de la Madre-1 \\\n",
|
||
"date ... \n",
|
||
"2017-01-01 0.000000 1.000000 ... 0.0 \n",
|
||
"2017-01-02 0.201299 0.979530 ... 0.0 \n",
|
||
"2017-01-03 0.394356 0.918958 ... 0.0 \n",
|
||
"2017-01-04 0.571268 0.820763 ... 0.0 \n",
|
||
"2017-01-05 0.724793 0.688967 ... 0.0 \n",
|
||
"\n",
|
||
" description_Dia del Trabajo \\\n",
|
||
"date \n",
|
||
"2017-01-01 0.0 \n",
|
||
"2017-01-02 0.0 \n",
|
||
"2017-01-03 0.0 \n",
|
||
"2017-01-04 0.0 \n",
|
||
"2017-01-05 0.0 \n",
|
||
"\n",
|
||
" description_Primer Grito de Independencia \\\n",
|
||
"date \n",
|
||
"2017-01-01 0.0 \n",
|
||
"2017-01-02 0.0 \n",
|
||
"2017-01-03 0.0 \n",
|
||
"2017-01-04 0.0 \n",
|
||
"2017-01-05 0.0 \n",
|
||
"\n",
|
||
" description_Primer dia del ano \\\n",
|
||
"date \n",
|
||
"2017-01-01 1.0 \n",
|
||
"2017-01-02 0.0 \n",
|
||
"2017-01-03 0.0 \n",
|
||
"2017-01-04 0.0 \n",
|
||
"2017-01-05 0.0 \n",
|
||
"\n",
|
||
" description_Provincializacion de Cotopaxi \\\n",
|
||
"date \n",
|
||
"2017-01-01 0.0 \n",
|
||
"2017-01-02 0.0 \n",
|
||
"2017-01-03 0.0 \n",
|
||
"2017-01-04 0.0 \n",
|
||
"2017-01-05 0.0 \n",
|
||
"\n",
|
||
" description_Provincializacion de Imbabura \\\n",
|
||
"date \n",
|
||
"2017-01-01 0.0 \n",
|
||
"2017-01-02 0.0 \n",
|
||
"2017-01-03 0.0 \n",
|
||
"2017-01-04 0.0 \n",
|
||
"2017-01-05 0.0 \n",
|
||
"\n",
|
||
" description_Traslado Batalla de Pichincha \\\n",
|
||
"date \n",
|
||
"2017-01-01 0.0 \n",
|
||
"2017-01-02 0.0 \n",
|
||
"2017-01-03 0.0 \n",
|
||
"2017-01-04 0.0 \n",
|
||
"2017-01-05 0.0 \n",
|
||
"\n",
|
||
" description_Traslado Primer Grito de Independencia \\\n",
|
||
"date \n",
|
||
"2017-01-01 0.0 \n",
|
||
"2017-01-02 0.0 \n",
|
||
"2017-01-03 0.0 \n",
|
||
"2017-01-04 0.0 \n",
|
||
"2017-01-05 0.0 \n",
|
||
"\n",
|
||
" description_Traslado Primer dia del ano description_Viernes Santo \n",
|
||
"date \n",
|
||
"2017-01-01 0.0 0.0 \n",
|
||
"2017-01-02 1.0 0.0 \n",
|
||
"2017-01-03 0.0 0.0 \n",
|
||
"2017-01-04 0.0 0.0 \n",
|
||
"2017-01-05 0.0 0.0 \n",
|
||
"\n",
|
||
"[5 rows x 29 columns]"
|
||
]
|
||
},
|
||
"execution_count": 13,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_2.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"LinearRegression(fit_intercept=False)"
|
||
]
|
||
},
|
||
"execution_count": 14,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# Split the data to train and valid datasets\n",
|
||
"# X_train, X_valid, y_train, y_valid = train_test_split(X_2, y, test_size=0.2, shuffle=False)\n",
|
||
"# X_train, X_valid, y_train, y_valid = train_test_split(X_2, y, test_size=0.35, shuffle=False)\n",
|
||
"X_train, X_valid, y_train, y_valid = train_test_split(X_2, y, test_size=0.1, shuffle=False)\n",
|
||
"\n",
|
||
"# Train the model\n",
|
||
"model = LinearRegression(fit_intercept=False)\n",
|
||
"model.fit(X_train, y_train)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
" sales \\\n",
|
||
"store_nbr 1 \n",
|
||
"family AUTOMOTIVE BABY CARE BEAUTY BEVERAGES BOOKS \n",
|
||
"date \n",
|
||
"2017-01-01 7.549517e-15 0.0 -7.993606e-15 2.955858e-12 -7.771561e-16 \n",
|
||
"2017-01-02 5.000000e+00 0.0 -7.105427e-15 1.434000e+03 -2.220446e-16 \n",
|
||
"2017-01-03 3.909703e+00 0.0 4.124157e+00 2.618392e+03 1.045599e+00 \n",
|
||
"2017-01-04 4.126214e+00 0.0 3.838988e+00 2.712982e+03 1.081908e+00 \n",
|
||
"2017-01-05 3.775828e+00 0.0 2.772568e+00 2.300269e+03 7.568947e-01 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"2017-07-19 3.517707e+00 0.0 3.105532e+00 2.468518e+03 3.441017e-01 \n",
|
||
"2017-07-20 3.647447e+00 0.0 2.439251e+00 2.119398e+03 1.701704e-02 \n",
|
||
"2017-07-21 5.893711e+00 0.0 2.909403e+00 2.486530e+03 1.248007e-01 \n",
|
||
"2017-07-22 5.273413e+00 0.0 4.062480e+00 2.551018e+03 -4.670959e-02 \n",
|
||
"2017-07-23 3.187038e+00 0.0 2.313208e+00 1.088383e+03 1.452946e-02 \n",
|
||
"\n",
|
||
" \\\n",
|
||
"store_nbr \n",
|
||
"family BREAD/BAKERY CELEBRATION CLEANING DAIRY \n",
|
||
"date \n",
|
||
"2017-01-01 1.989520e-12 -6.217249e-15 -9.094947e-13 -1.705303e-13 \n",
|
||
"2017-01-02 1.668190e+02 -2.486900e-14 3.320000e+02 3.760000e+02 \n",
|
||
"2017-01-03 4.490550e+02 1.517079e+01 8.942456e+02 8.789766e+02 \n",
|
||
"2017-01-04 5.081972e+02 1.504819e+01 9.702045e+02 9.755208e+02 \n",
|
||
"2017-01-05 4.394140e+02 1.738983e+01 7.793492e+02 8.050355e+02 \n",
|
||
"... ... ... ... ... \n",
|
||
"2017-07-19 4.216974e+02 1.598562e+01 8.426845e+02 8.368564e+02 \n",
|
||
"2017-07-20 3.624852e+02 1.922020e+01 6.701852e+02 6.855419e+02 \n",
|
||
"2017-07-21 3.826220e+02 2.257978e+01 7.639496e+02 8.014753e+02 \n",
|
||
"2017-07-22 3.873133e+02 1.527802e+01 6.465492e+02 7.748747e+02 \n",
|
||
"2017-07-23 1.277964e+02 5.455353e+00 2.535911e+02 3.199754e+02 \n",
|
||
"\n",
|
||
" ... \\\n",
|
||
"store_nbr ... 9 \n",
|
||
"family DELI ... MAGAZINES MEATS PERSONAL CARE \n",
|
||
"date ... \n",
|
||
"2017-01-01 1.278977e-13 ... -2.664535e-15 5.684342e-13 1.136868e-13 \n",
|
||
"2017-01-02 4.498000e+01 ... 5.000000e+00 6.595700e+02 1.243000e+03 \n",
|
||
"2017-01-03 1.619733e+02 ... 2.243120e+00 4.683032e+02 7.011916e+02 \n",
|
||
"2017-01-04 1.587053e+02 ... 2.111086e+00 4.272279e+02 6.251386e+02 \n",
|
||
"2017-01-05 1.299417e+02 ... 1.737285e+00 5.913621e+02 5.646615e+02 \n",
|
||
"... ... ... ... ... ... \n",
|
||
"2017-07-19 1.427022e+02 ... 3.127275e+00 3.114802e+02 3.201317e+02 \n",
|
||
"2017-07-20 1.171575e+02 ... 3.143815e+00 4.864077e+02 2.923701e+02 \n",
|
||
"2017-07-21 1.776076e+02 ... 3.411107e+00 2.600663e+02 2.449846e+02 \n",
|
||
"2017-07-22 1.394483e+02 ... 4.437929e+00 3.919628e+02 5.415536e+02 \n",
|
||
"2017-07-23 5.844992e+01 ... 5.086590e+00 4.210319e+02 6.054938e+02 \n",
|
||
"\n",
|
||
" \\\n",
|
||
"store_nbr \n",
|
||
"family PET SUPPLIES PLAYERS AND ELECTRONICS POULTRY PREPARED FOODS \n",
|
||
"date \n",
|
||
"2017-01-01 5.151435e-14 3.197442e-14 2.955858e-12 3.126388e-13 \n",
|
||
"2017-01-02 1.100000e+01 4.100000e+01 8.435960e+02 1.151890e+02 \n",
|
||
"2017-01-03 8.008500e+00 1.619647e+01 5.175689e+02 1.307421e+02 \n",
|
||
"2017-01-04 8.903250e+00 1.377409e+01 4.629252e+02 1.159961e+02 \n",
|
||
"2017-01-05 6.170211e+00 1.307464e+01 4.119202e+02 1.093756e+02 \n",
|
||
"... ... ... ... ... \n",
|
||
"2017-07-19 7.726077e+00 8.281865e+00 3.162381e+02 9.880941e+01 \n",
|
||
"2017-07-20 5.606643e+00 8.421032e+00 2.891628e+02 9.537463e+01 \n",
|
||
"2017-07-21 6.119672e+00 6.881565e+00 4.250789e+02 9.530784e+01 \n",
|
||
"2017-07-22 9.261406e+00 1.366985e+01 5.178388e+02 1.523055e+02 \n",
|
||
"2017-07-23 1.021215e+01 1.552290e+01 6.071180e+02 1.528399e+02 \n",
|
||
"\n",
|
||
" \n",
|
||
"store_nbr \n",
|
||
"family PRODUCE SCHOOL AND OFFICE SUPPLIES SEAFOOD \n",
|
||
"date \n",
|
||
"2017-01-01 1.091394e-11 -1.953993e-14 5.329071e-14 \n",
|
||
"2017-01-02 3.136896e+03 1.000000e+00 2.300000e+01 \n",
|
||
"2017-01-03 2.726351e+03 4.085220e+00 1.660380e+01 \n",
|
||
"2017-01-04 1.590723e+03 3.699744e+00 1.474686e+01 \n",
|
||
"2017-01-05 1.513519e+03 3.365985e+00 1.390525e+01 \n",
|
||
"... ... ... ... \n",
|
||
"2017-07-19 1.181186e+03 2.437601e+00 1.505896e+01 \n",
|
||
"2017-07-20 1.175080e+03 2.168726e+00 1.569676e+01 \n",
|
||
"2017-07-21 1.134143e+03 1.638062e+00 1.926372e+01 \n",
|
||
"2017-07-22 1.808542e+03 2.821081e+00 2.949308e+01 \n",
|
||
"2017-07-23 2.166858e+03 5.233486e+00 2.702881e+01 \n",
|
||
"\n",
|
||
"[204 rows x 1782 columns]\n",
|
||
" sales \\\n",
|
||
"store_nbr 1 \n",
|
||
"family AUTOMOTIVE BABY CARE BEAUTY BEVERAGES BOOKS BREAD/BAKERY \n",
|
||
"date \n",
|
||
"2017-07-24 5.727819 0.0 4.264784 2385.679666 0.055453 382.279625 \n",
|
||
"2017-07-25 5.412602 0.0 3.834436 2278.386019 0.396093 338.218895 \n",
|
||
"2017-07-26 5.546587 0.0 3.485700 2316.651964 0.377730 400.532214 \n",
|
||
"2017-07-27 5.059765 0.0 2.247602 1883.424889 -0.026401 338.205061 \n",
|
||
"2017-07-28 6.389507 0.0 2.270892 2202.311608 -0.017203 360.786261 \n",
|
||
"2017-07-29 4.868778 0.0 3.306115 2279.339161 -0.256233 373.695351 \n",
|
||
"2017-07-30 2.207400 0.0 1.836196 899.199888 -0.191291 127.094004 \n",
|
||
"2017-07-31 4.678854 0.0 4.352000 2336.187665 -0.066218 396.680590 \n",
|
||
"2017-08-01 4.778659 0.0 4.521779 2392.748205 0.412924 366.289136 \n",
|
||
"2017-08-02 5.600586 0.0 4.538701 2575.538361 0.538342 437.011446 \n",
|
||
"2017-08-03 5.764967 0.0 3.269540 2228.170975 0.234372 375.081209 \n",
|
||
"2017-08-04 7.432887 0.0 2.887853 2552.777991 0.273799 389.502433 \n",
|
||
"2017-08-05 5.806991 0.0 3.336573 2558.951795 0.001984 387.765613 \n",
|
||
"2017-08-06 2.657732 0.0 1.361883 1057.757948 0.005308 124.093812 \n",
|
||
"2017-08-07 4.471562 0.0 3.659669 2362.172056 0.082603 378.754515 \n",
|
||
"2017-08-08 4.009722 0.0 3.952340 2311.455630 0.557482 338.946077 \n",
|
||
"2017-08-09 4.571850 0.0 4.324633 2435.209443 0.725329 406.760191 \n",
|
||
"2017-08-10 4.847607 0.0 3.440642 2080.712674 0.487477 346.993408 \n",
|
||
"2017-08-11 6.917608 0.0 3.282854 2436.892083 0.579964 365.788596 \n",
|
||
"2017-08-12 5.800927 0.0 3.700406 2491.326727 0.314756 367.907618 \n",
|
||
"2017-08-13 3.060167 0.0 1.468358 1033.876882 0.261317 106.280688 \n",
|
||
"2017-08-14 5.025311 0.0 3.396951 2364.330867 0.222118 361.737573 \n",
|
||
"2017-08-15 4.399170 0.0 3.340403 2319.427404 0.540461 323.287381 \n",
|
||
"\n",
|
||
" ... \\\n",
|
||
"store_nbr ... 9 \n",
|
||
"family CELEBRATION CLEANING DAIRY DELI ... MAGAZINES \n",
|
||
"date ... \n",
|
||
"2017-07-24 17.352126 695.819143 751.488594 142.684933 ... 2.016965 \n",
|
||
"2017-07-25 18.100213 762.352414 714.239194 130.718884 ... 1.652873 \n",
|
||
"2017-07-26 18.563148 821.473226 813.105846 130.694267 ... 1.881480 \n",
|
||
"2017-07-27 21.163602 621.162165 648.149071 106.624841 ... 1.915397 \n",
|
||
"2017-07-28 23.503995 695.520162 756.795133 171.223902 ... 2.518552 \n",
|
||
"2017-07-29 15.333068 578.088065 735.824934 139.840805 ... 4.140482 \n",
|
||
"2017-07-30 5.213553 210.482026 302.500484 67.380162 ... 5.461631 \n",
|
||
"2017-07-31 17.491545 700.621100 768.404143 160.170868 ... 2.906301 \n",
|
||
"2017-08-01 19.034368 825.722365 769.041367 154.528914 ... 2.700340 \n",
|
||
"2017-08-02 20.202608 936.992738 896.839576 156.581835 ... 2.649688 \n",
|
||
"2017-08-03 22.935072 766.608317 741.289737 129.375025 ... 2.040779 \n",
|
||
"2017-08-04 24.624582 840.164127 835.775225 186.190972 ... 1.850603 \n",
|
||
"2017-08-05 15.207490 692.832834 781.521850 144.378437 ... 2.816441 \n",
|
||
"2017-08-06 3.758439 276.501237 306.946298 61.568925 ... 3.888926 \n",
|
||
"2017-08-07 15.226970 713.173576 736.787017 146.548447 ... 1.654441 \n",
|
||
"2017-08-08 16.898276 792.352445 716.336879 136.980459 ... 2.332718 \n",
|
||
"2017-08-09 19.177148 871.964553 840.943797 138.917704 ... 3.545681 \n",
|
||
"2017-08-10 23.656870 684.790070 696.007713 114.221427 ... 4.263701 \n",
|
||
"2017-08-11 27.145466 753.114674 806.741096 174.605093 ... 5.105466 \n",
|
||
"2017-08-12 18.989149 608.052140 766.360458 136.202311 ... 6.515294 \n",
|
||
"2017-08-13 7.873953 198.879423 298.781202 56.176754 ... 7.307837 \n",
|
||
"2017-08-14 18.662329 646.730713 728.755045 143.506630 ... 4.135282 \n",
|
||
"2017-08-15 18.831571 740.793918 704.750778 136.277563 ... 3.465774 \n",
|
||
"\n",
|
||
" \\\n",
|
||
"store_nbr \n",
|
||
"family MEATS PERSONAL CARE PET SUPPLIES PLAYERS AND ELECTRONICS \n",
|
||
"date \n",
|
||
"2017-07-24 330.568666 377.398894 6.252144 7.744248 \n",
|
||
"2017-07-25 298.701236 346.479212 6.199165 7.719281 \n",
|
||
"2017-07-26 265.901945 278.278835 7.731737 5.980156 \n",
|
||
"2017-07-27 444.601746 248.987257 5.833767 6.273378 \n",
|
||
"2017-07-28 228.244110 212.577697 6.462705 5.334357 \n",
|
||
"2017-07-29 380.901430 541.779536 9.729966 13.269033 \n",
|
||
"2017-07-30 443.873255 664.256731 10.979756 16.748006 \n",
|
||
"2017-07-31 397.241287 515.056248 7.589111 10.783432 \n",
|
||
"2017-08-01 409.543283 566.944181 8.305971 12.288100 \n",
|
||
"2017-08-02 408.042882 562.240553 10.555218 11.291611 \n",
|
||
"2017-08-03 593.612080 555.249893 8.973613 11.210290 \n",
|
||
"2017-08-04 354.976877 488.061458 9.239992 8.780966 \n",
|
||
"2017-08-05 460.746753 736.720670 11.408667 14.475567 \n",
|
||
"2017-08-06 464.352944 746.657917 11.049313 15.575780 \n",
|
||
"2017-08-07 360.715747 478.768603 5.977673 7.726295 \n",
|
||
"2017-08-08 330.297086 430.712287 5.422676 8.266013 \n",
|
||
"2017-08-09 305.742982 361.050153 7.141550 7.313108 \n",
|
||
"2017-08-10 486.594797 328.342387 5.834673 8.051455 \n",
|
||
"2017-08-11 256.307938 268.627021 6.978496 6.806118 \n",
|
||
"2017-08-12 377.976564 546.521895 10.272209 13.645332 \n",
|
||
"2017-08-13 401.664341 595.907553 10.935796 15.597060 \n",
|
||
"2017-08-14 321.062844 369.191684 6.563990 8.243415 \n",
|
||
"2017-08-15 315.709384 359.025564 6.331493 8.996785 \n",
|
||
"\n",
|
||
" \\\n",
|
||
"store_nbr \n",
|
||
"family POULTRY PREPARED FOODS PRODUCE SCHOOL AND OFFICE SUPPLIES \n",
|
||
"date \n",
|
||
"2017-07-24 373.164249 100.153197 1314.574351 3.182939 \n",
|
||
"2017-07-25 333.896679 100.806955 2122.806590 3.940399 \n",
|
||
"2017-07-26 286.221469 89.792485 973.362969 3.325020 \n",
|
||
"2017-07-27 253.651129 87.586691 937.627101 2.863491 \n",
|
||
"2017-07-28 389.402544 91.073857 917.422341 1.923888 \n",
|
||
"2017-07-29 495.824046 154.376611 1675.703587 2.619595 \n",
|
||
"2017-07-30 618.954101 163.202172 2176.106603 4.664124 \n",
|
||
"2017-07-31 438.080198 118.766267 1499.010604 2.547556 \n",
|
||
"2017-08-01 460.460030 124.952495 2475.305568 3.625184 \n",
|
||
"2017-08-02 465.021307 114.503613 1443.033302 3.651656 \n",
|
||
"2017-08-03 456.584476 107.127720 1440.498436 3.950330 \n",
|
||
"2017-08-04 576.844434 100.903821 1358.574828 3.619580 \n",
|
||
"2017-08-05 629.111427 152.750289 1975.442091 4.542771 \n",
|
||
"2017-08-06 672.921416 151.702501 2291.083170 6.335985 \n",
|
||
"2017-08-07 408.318903 101.557113 1431.206687 3.562271 \n",
|
||
"2017-08-08 362.564139 107.132489 2266.592644 3.786726 \n",
|
||
"2017-08-09 326.904124 100.323057 1158.479124 3.022802 \n",
|
||
"2017-08-10 307.961371 98.816429 1147.336219 2.800805 \n",
|
||
"2017-08-11 440.669215 98.483734 1108.395572 2.304686 \n",
|
||
"2017-08-12 517.987587 154.698306 1794.811738 3.369853 \n",
|
||
"2017-08-13 590.503456 155.965100 2181.836194 5.473258 \n",
|
||
"2017-08-14 353.263874 106.370655 1378.012093 3.017783 \n",
|
||
"2017-08-15 331.973466 111.348663 2247.437838 3.450708 \n",
|
||
"\n",
|
||
" \n",
|
||
"store_nbr \n",
|
||
"family SEAFOOD \n",
|
||
"date \n",
|
||
"2017-07-24 16.103552 \n",
|
||
"2017-07-25 15.812997 \n",
|
||
"2017-07-26 12.554808 \n",
|
||
"2017-07-27 11.862371 \n",
|
||
"2017-07-28 15.163625 \n",
|
||
"2017-07-29 26.534661 \n",
|
||
"2017-07-30 26.401286 \n",
|
||
"2017-07-31 18.334826 \n",
|
||
"2017-08-01 20.618136 \n",
|
||
"2017-08-02 18.989687 \n",
|
||
"2017-08-03 18.658489 \n",
|
||
"2017-08-04 21.075149 \n",
|
||
"2017-08-05 30.582249 \n",
|
||
"2017-08-06 27.994951 \n",
|
||
"2017-08-07 17.327000 \n",
|
||
"2017-08-08 17.335932 \n",
|
||
"2017-08-09 14.237254 \n",
|
||
"2017-08-10 13.624717 \n",
|
||
"2017-08-11 17.086724 \n",
|
||
"2017-08-12 28.720853 \n",
|
||
"2017-08-13 28.705497 \n",
|
||
"2017-08-14 20.204812 \n",
|
||
"2017-08-15 21.237452 \n",
|
||
"\n",
|
||
"[23 rows x 1782 columns]\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"# Get the training and valid data predictions\n",
|
||
"y_train_pred = pd.DataFrame(model.predict(X_train), index=X_train.index, columns=y.columns)\n",
|
||
"y_valid_pred = pd.DataFrame(model.predict(X_valid), index=X_valid.index, columns=y.columns)\n",
|
||
"print(y_train_pred)\n",
|
||
"# Evaluate the model using mean_squared_log_error\n",
|
||
"print(y_valid_pred)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"77.5594297705251\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"print(mean_absolute_error(y_valid, y_valid_pred))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 17,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Create features for test set\n",
|
||
"# \"out of sample\" refers to times outside of the observation period of the training data.\n",
|
||
"# We are forecasting for next 16 days from the end of the training data date\n",
|
||
"test = dp.out_of_sample(steps=16)\n",
|
||
"test.index.name = 'date'\n",
|
||
"X_test = test.join(X_holidays, on='date').fillna(0.0)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>const</th>\n",
|
||
" <th>trend</th>\n",
|
||
" <th>s(2,7)</th>\n",
|
||
" <th>s(3,7)</th>\n",
|
||
" <th>s(4,7)</th>\n",
|
||
" <th>s(5,7)</th>\n",
|
||
" <th>s(6,7)</th>\n",
|
||
" <th>s(7,7)</th>\n",
|
||
" <th>sin(1,freq=M)</th>\n",
|
||
" <th>cos(1,freq=M)</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>description_Dia de la Madre-1</th>\n",
|
||
" <th>description_Dia del Trabajo</th>\n",
|
||
" <th>description_Primer Grito de Independencia</th>\n",
|
||
" <th>description_Primer dia del ano</th>\n",
|
||
" <th>description_Provincializacion de Cotopaxi</th>\n",
|
||
" <th>description_Provincializacion de Imbabura</th>\n",
|
||
" <th>description_Traslado Batalla de Pichincha</th>\n",
|
||
" <th>description_Traslado Primer Grito de Independencia</th>\n",
|
||
" <th>description_Traslado Primer dia del ano</th>\n",
|
||
" <th>description_Viernes Santo</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>date</th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>2017-08-16</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>228.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.101168</td>\n",
|
||
" <td>-0.994869</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-08-17</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>229.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>-0.101168</td>\n",
|
||
" <td>-0.994869</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-08-18</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>230.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>-0.299363</td>\n",
|
||
" <td>-0.954139</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-08-19</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>231.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>-0.485302</td>\n",
|
||
" <td>-0.874347</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-08-20</th>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>232.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>-0.651372</td>\n",
|
||
" <td>-0.758758</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>5 rows × 29 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" const trend s(2,7) s(3,7) s(4,7) s(5,7) s(6,7) s(7,7) \\\n",
|
||
"date \n",
|
||
"2017-08-16 1.0 228.0 0.0 0.0 1.0 0.0 0.0 0.0 \n",
|
||
"2017-08-17 1.0 229.0 0.0 0.0 0.0 1.0 0.0 0.0 \n",
|
||
"2017-08-18 1.0 230.0 0.0 0.0 0.0 0.0 1.0 0.0 \n",
|
||
"2017-08-19 1.0 231.0 0.0 0.0 0.0 0.0 0.0 1.0 \n",
|
||
"2017-08-20 1.0 232.0 0.0 0.0 0.0 0.0 0.0 0.0 \n",
|
||
"\n",
|
||
" sin(1,freq=M) cos(1,freq=M) ... description_Dia de la Madre-1 \\\n",
|
||
"date ... \n",
|
||
"2017-08-16 0.101168 -0.994869 ... 0.0 \n",
|
||
"2017-08-17 -0.101168 -0.994869 ... 0.0 \n",
|
||
"2017-08-18 -0.299363 -0.954139 ... 0.0 \n",
|
||
"2017-08-19 -0.485302 -0.874347 ... 0.0 \n",
|
||
"2017-08-20 -0.651372 -0.758758 ... 0.0 \n",
|
||
"\n",
|
||
" description_Dia del Trabajo \\\n",
|
||
"date \n",
|
||
"2017-08-16 0.0 \n",
|
||
"2017-08-17 0.0 \n",
|
||
"2017-08-18 0.0 \n",
|
||
"2017-08-19 0.0 \n",
|
||
"2017-08-20 0.0 \n",
|
||
"\n",
|
||
" description_Primer Grito de Independencia \\\n",
|
||
"date \n",
|
||
"2017-08-16 0.0 \n",
|
||
"2017-08-17 0.0 \n",
|
||
"2017-08-18 0.0 \n",
|
||
"2017-08-19 0.0 \n",
|
||
"2017-08-20 0.0 \n",
|
||
"\n",
|
||
" description_Primer dia del ano \\\n",
|
||
"date \n",
|
||
"2017-08-16 0.0 \n",
|
||
"2017-08-17 0.0 \n",
|
||
"2017-08-18 0.0 \n",
|
||
"2017-08-19 0.0 \n",
|
||
"2017-08-20 0.0 \n",
|
||
"\n",
|
||
" description_Provincializacion de Cotopaxi \\\n",
|
||
"date \n",
|
||
"2017-08-16 0.0 \n",
|
||
"2017-08-17 0.0 \n",
|
||
"2017-08-18 0.0 \n",
|
||
"2017-08-19 0.0 \n",
|
||
"2017-08-20 0.0 \n",
|
||
"\n",
|
||
" description_Provincializacion de Imbabura \\\n",
|
||
"date \n",
|
||
"2017-08-16 0.0 \n",
|
||
"2017-08-17 0.0 \n",
|
||
"2017-08-18 0.0 \n",
|
||
"2017-08-19 0.0 \n",
|
||
"2017-08-20 0.0 \n",
|
||
"\n",
|
||
" description_Traslado Batalla de Pichincha \\\n",
|
||
"date \n",
|
||
"2017-08-16 0.0 \n",
|
||
"2017-08-17 0.0 \n",
|
||
"2017-08-18 0.0 \n",
|
||
"2017-08-19 0.0 \n",
|
||
"2017-08-20 0.0 \n",
|
||
"\n",
|
||
" description_Traslado Primer Grito de Independencia \\\n",
|
||
"date \n",
|
||
"2017-08-16 0.0 \n",
|
||
"2017-08-17 0.0 \n",
|
||
"2017-08-18 0.0 \n",
|
||
"2017-08-19 0.0 \n",
|
||
"2017-08-20 0.0 \n",
|
||
"\n",
|
||
" description_Traslado Primer dia del ano description_Viernes Santo \n",
|
||
"date \n",
|
||
"2017-08-16 0.0 0.0 \n",
|
||
"2017-08-17 0.0 0.0 \n",
|
||
"2017-08-18 0.0 0.0 \n",
|
||
"2017-08-19 0.0 0.0 \n",
|
||
"2017-08-20 0.0 0.0 \n",
|
||
"\n",
|
||
"[5 rows x 29 columns]"
|
||
]
|
||
},
|
||
"execution_count": 18,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"X_test.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 19,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead tr th {\n",
|
||
" text-align: left;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead tr:last-of-type th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr>\n",
|
||
" <th></th>\n",
|
||
" <th colspan=\"21\" halign=\"left\">sales</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>store_nbr</th>\n",
|
||
" <th colspan=\"10\" halign=\"left\">1</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th colspan=\"10\" halign=\"left\">9</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>family</th>\n",
|
||
" <th>AUTOMOTIVE</th>\n",
|
||
" <th>BABY CARE</th>\n",
|
||
" <th>BEAUTY</th>\n",
|
||
" <th>BEVERAGES</th>\n",
|
||
" <th>BOOKS</th>\n",
|
||
" <th>BREAD/BAKERY</th>\n",
|
||
" <th>CELEBRATION</th>\n",
|
||
" <th>CLEANING</th>\n",
|
||
" <th>DAIRY</th>\n",
|
||
" <th>DELI</th>\n",
|
||
" <th>...</th>\n",
|
||
" <th>MAGAZINES</th>\n",
|
||
" <th>MEATS</th>\n",
|
||
" <th>PERSONAL CARE</th>\n",
|
||
" <th>PET SUPPLIES</th>\n",
|
||
" <th>PLAYERS AND ELECTRONICS</th>\n",
|
||
" <th>POULTRY</th>\n",
|
||
" <th>PREPARED FOODS</th>\n",
|
||
" <th>PRODUCE</th>\n",
|
||
" <th>SCHOOL AND OFFICE SUPPLIES</th>\n",
|
||
" <th>SEAFOOD</th>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>date</th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" <th></th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>2017-08-16</th>\n",
|
||
" <td>4.530377</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>3.476643</td>\n",
|
||
" <td>2435.125539</td>\n",
|
||
" <td>0.539017</td>\n",
|
||
" <td>394.844494</td>\n",
|
||
" <td>19.155803</td>\n",
|
||
" <td>837.879756</td>\n",
|
||
" <td>825.955214</td>\n",
|
||
" <td>140.726314</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>3.273131</td>\n",
|
||
" <td>315.692006</td>\n",
|
||
" <td>321.105247</td>\n",
|
||
" <td>8.063437</td>\n",
|
||
" <td>8.086183</td>\n",
|
||
" <td>317.477594</td>\n",
|
||
" <td>103.291276</td>\n",
|
||
" <td>1153.840125</td>\n",
|
||
" <td>2.742248</td>\n",
|
||
" <td>17.715609</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-08-17</th>\n",
|
||
" <td>4.243753</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>2.506978</td>\n",
|
||
" <td>2069.480159</td>\n",
|
||
" <td>0.146672</td>\n",
|
||
" <td>341.867130</td>\n",
|
||
" <td>21.657030</td>\n",
|
||
" <td>668.495547</td>\n",
|
||
" <td>680.198257</td>\n",
|
||
" <td>118.434844</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2.869787</td>\n",
|
||
" <td>515.972387</td>\n",
|
||
" <td>311.906195</td>\n",
|
||
" <td>6.568633</td>\n",
|
||
" <td>8.765447</td>\n",
|
||
" <td>315.186602</td>\n",
|
||
" <td>100.110891</td>\n",
|
||
" <td>1145.386003</td>\n",
|
||
" <td>2.450307</td>\n",
|
||
" <td>15.519307</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-08-18</th>\n",
|
||
" <td>5.801745</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>2.413335</td>\n",
|
||
" <td>2421.619056</td>\n",
|
||
" <td>0.123446</td>\n",
|
||
" <td>369.494607</td>\n",
|
||
" <td>23.541928</td>\n",
|
||
" <td>752.379160</td>\n",
|
||
" <td>793.051857</td>\n",
|
||
" <td>180.492023</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>3.098471</td>\n",
|
||
" <td>295.035107</td>\n",
|
||
" <td>265.940697</td>\n",
|
||
" <td>7.409832</td>\n",
|
||
" <td>7.368460</td>\n",
|
||
" <td>457.824625</td>\n",
|
||
" <td>97.760310</td>\n",
|
||
" <td>1105.598207</td>\n",
|
||
" <td>1.829857</td>\n",
|
||
" <td>16.994486</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-08-19</th>\n",
|
||
" <td>4.404581</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>3.034082</td>\n",
|
||
" <td>2484.809788</td>\n",
|
||
" <td>-0.199899</td>\n",
|
||
" <td>380.191686</td>\n",
|
||
" <td>14.470116</td>\n",
|
||
" <td>619.148805</td>\n",
|
||
" <td>756.691792</td>\n",
|
||
" <td>142.406902</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>4.445279</td>\n",
|
||
" <td>413.255484</td>\n",
|
||
" <td>547.682569</td>\n",
|
||
" <td>10.337513</td>\n",
|
||
" <td>13.942843</td>\n",
|
||
" <td>536.649932</td>\n",
|
||
" <td>151.770182</td>\n",
|
||
" <td>1792.815317</td>\n",
|
||
" <td>2.798092</td>\n",
|
||
" <td>27.114291</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-08-20</th>\n",
|
||
" <td>1.746713</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1.132583</td>\n",
|
||
" <td>1047.939563</td>\n",
|
||
" <td>-0.242533</td>\n",
|
||
" <td>124.285728</td>\n",
|
||
" <td>3.314559</td>\n",
|
||
" <td>218.241260</td>\n",
|
||
" <td>293.710673</td>\n",
|
||
" <td>61.056835</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>5.587107</td>\n",
|
||
" <td>422.096330</td>\n",
|
||
" <td>593.025760</td>\n",
|
||
" <td>10.615341</td>\n",
|
||
" <td>15.512749</td>\n",
|
||
" <td>602.836709</td>\n",
|
||
" <td>150.962957</td>\n",
|
||
" <td>2182.417892</td>\n",
|
||
" <td>4.899568</td>\n",
|
||
" <td>26.631198</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-08-21</th>\n",
|
||
" <td>4.197514</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>3.489445</td>\n",
|
||
" <td>2402.995314</td>\n",
|
||
" <td>-0.202355</td>\n",
|
||
" <td>380.765232</td>\n",
|
||
" <td>14.954285</td>\n",
|
||
" <td>671.859086</td>\n",
|
||
" <td>727.873511</td>\n",
|
||
" <td>145.608057</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2.926209</td>\n",
|
||
" <td>320.496000</td>\n",
|
||
" <td>358.175192</td>\n",
|
||
" <td>5.904555</td>\n",
|
||
" <td>7.695611</td>\n",
|
||
" <td>354.743218</td>\n",
|
||
" <td>99.771705</td>\n",
|
||
" <td>1378.102781</td>\n",
|
||
" <td>2.575910</td>\n",
|
||
" <td>18.710922</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-08-22</th>\n",
|
||
" <td>4.380181</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>3.885605</td>\n",
|
||
" <td>2373.957701</td>\n",
|
||
" <td>0.247319</td>\n",
|
||
" <td>338.210541</td>\n",
|
||
" <td>16.676904</td>\n",
|
||
" <td>769.524478</td>\n",
|
||
" <td>706.798103</td>\n",
|
||
" <td>134.702782</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2.673161</td>\n",
|
||
" <td>294.686378</td>\n",
|
||
" <td>339.686169</td>\n",
|
||
" <td>5.467773</td>\n",
|
||
" <td>7.964100</td>\n",
|
||
" <td>322.597153</td>\n",
|
||
" <td>103.774080</td>\n",
|
||
" <td>2236.205025</td>\n",
|
||
" <td>3.275714</td>\n",
|
||
" <td>20.822769</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-08-23</th>\n",
|
||
" <td>5.449226</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>4.377596</td>\n",
|
||
" <td>2482.942320</td>\n",
|
||
" <td>0.395875</td>\n",
|
||
" <td>401.550486</td>\n",
|
||
" <td>18.868402</td>\n",
|
||
" <td>866.401057</td>\n",
|
||
" <td>828.422416</td>\n",
|
||
" <td>135.273727</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2.636684</td>\n",
|
||
" <td>279.782068</td>\n",
|
||
" <td>295.311729</td>\n",
|
||
" <td>7.200184</td>\n",
|
||
" <td>6.601095</td>\n",
|
||
" <td>300.432556</td>\n",
|
||
" <td>95.190160</td>\n",
|
||
" <td>1113.452235</td>\n",
|
||
" <td>2.927540</td>\n",
|
||
" <td>18.084607</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-08-24</th>\n",
|
||
" <td>5.966248</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>3.532936</td>\n",
|
||
" <td>2078.429559</td>\n",
|
||
" <td>0.129517</td>\n",
|
||
" <td>338.248806</td>\n",
|
||
" <td>23.048885</td>\n",
|
||
" <td>689.752267</td>\n",
|
||
" <td>678.907893</td>\n",
|
||
" <td>109.559662</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2.113239</td>\n",
|
||
" <td>472.197635</td>\n",
|
||
" <td>281.027237</td>\n",
|
||
" <td>5.913435</td>\n",
|
||
" <td>6.892363</td>\n",
|
||
" <td>293.766398</td>\n",
|
||
" <td>91.596539</td>\n",
|
||
" <td>1056.203888</td>\n",
|
||
" <td>3.005488</td>\n",
|
||
" <td>15.767767</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-08-25</th>\n",
|
||
" <td>7.942797</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>3.253891</td>\n",
|
||
" <td>2359.660562</td>\n",
|
||
" <td>0.170662</td>\n",
|
||
" <td>355.829919</td>\n",
|
||
" <td>25.967477</td>\n",
|
||
" <td>756.719186</td>\n",
|
||
" <td>782.699559</td>\n",
|
||
" <td>169.236486</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2.071389</td>\n",
|
||
" <td>248.780778</td>\n",
|
||
" <td>229.683957</td>\n",
|
||
" <td>7.087813</td>\n",
|
||
" <td>5.200893</td>\n",
|
||
" <td>432.854896</td>\n",
|
||
" <td>88.810096</td>\n",
|
||
" <td>955.003938</td>\n",
|
||
" <td>2.653712</td>\n",
|
||
" <td>16.124829</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-08-26</th>\n",
|
||
" <td>6.433461</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>3.414250</td>\n",
|
||
" <td>2332.943993</td>\n",
|
||
" <td>-0.166271</td>\n",
|
||
" <td>359.026534</td>\n",
|
||
" <td>17.047643</td>\n",
|
||
" <td>597.937552</td>\n",
|
||
" <td>732.941236</td>\n",
|
||
" <td>130.398960</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>3.199484</td>\n",
|
||
" <td>367.677208</td>\n",
|
||
" <td>505.829741</td>\n",
|
||
" <td>10.343172</td>\n",
|
||
" <td>11.641134</td>\n",
|
||
" <td>506.633259</td>\n",
|
||
" <td>142.753254</td>\n",
|
||
" <td>1584.992754</td>\n",
|
||
" <td>3.685511</td>\n",
|
||
" <td>24.610141</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-08-27</th>\n",
|
||
" <td>3.159030</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>0.940934</td>\n",
|
||
" <td>811.966181</td>\n",
|
||
" <td>-0.285951</td>\n",
|
||
" <td>100.005596</td>\n",
|
||
" <td>5.257965</td>\n",
|
||
" <td>169.218185</td>\n",
|
||
" <td>256.317846</td>\n",
|
||
" <td>50.524195</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>4.358689</td>\n",
|
||
" <td>380.290332</td>\n",
|
||
" <td>549.642942</td>\n",
|
||
" <td>10.842465</td>\n",
|
||
" <td>13.365095</td>\n",
|
||
" <td>567.325041</td>\n",
|
||
" <td>143.175022</td>\n",
|
||
" <td>1944.965118</td>\n",
|
||
" <td>5.594332</td>\n",
|
||
" <td>22.796808</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-08-28</th>\n",
|
||
" <td>4.693311</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>2.850934</td>\n",
|
||
" <td>2118.776864</td>\n",
|
||
" <td>-0.344358</td>\n",
|
||
" <td>358.929538</td>\n",
|
||
" <td>15.878498</td>\n",
|
||
" <td>603.429651</td>\n",
|
||
" <td>683.193370</td>\n",
|
||
" <td>139.224362</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2.033654</td>\n",
|
||
" <td>288.673828</td>\n",
|
||
" <td>325.768281</td>\n",
|
||
" <td>6.247588</td>\n",
|
||
" <td>6.148403</td>\n",
|
||
" <td>319.066872</td>\n",
|
||
" <td>95.537720</td>\n",
|
||
" <td>1161.381639</td>\n",
|
||
" <td>2.861736</td>\n",
|
||
" <td>14.610827</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-08-29</th>\n",
|
||
" <td>3.975547</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>3.129241</td>\n",
|
||
" <td>2102.279061</td>\n",
|
||
" <td>0.037795</td>\n",
|
||
" <td>324.592586</td>\n",
|
||
" <td>16.731948</td>\n",
|
||
" <td>701.063303</td>\n",
|
||
" <td>667.748359</td>\n",
|
||
" <td>135.095252</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>2.375713</td>\n",
|
||
" <td>283.625018</td>\n",
|
||
" <td>339.912089</td>\n",
|
||
" <td>5.936333</td>\n",
|
||
" <td>7.563280</td>\n",
|
||
" <td>300.582429</td>\n",
|
||
" <td>105.845232</td>\n",
|
||
" <td>2103.366931</td>\n",
|
||
" <td>3.074228</td>\n",
|
||
" <td>17.864347</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-08-30</th>\n",
|
||
" <td>4.469587</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>3.900584</td>\n",
|
||
" <td>2293.759485</td>\n",
|
||
" <td>0.190055</td>\n",
|
||
" <td>400.848069</td>\n",
|
||
" <td>18.626602</td>\n",
|
||
" <td>823.291951</td>\n",
|
||
" <td>810.947465</td>\n",
|
||
" <td>144.203969</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>3.011725</td>\n",
|
||
" <td>302.623420</td>\n",
|
||
" <td>354.074686</td>\n",
|
||
" <td>7.967787</td>\n",
|
||
" <td>7.826197</td>\n",
|
||
" <td>312.268704</td>\n",
|
||
" <td>105.552481</td>\n",
|
||
" <td>1122.700845</td>\n",
|
||
" <td>2.358178</td>\n",
|
||
" <td>17.457082</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2017-08-31</th>\n",
|
||
" <td>4.917283</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>3.620152</td>\n",
|
||
" <td>2028.937558</td>\n",
|
||
" <td>0.007847</td>\n",
|
||
" <td>352.649771</td>\n",
|
||
" <td>23.188303</td>\n",
|
||
" <td>694.554224</td>\n",
|
||
" <td>695.823441</td>\n",
|
||
" <td>127.045596</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>3.002575</td>\n",
|
||
" <td>538.870256</td>\n",
|
||
" <td>418.684592</td>\n",
|
||
" <td>7.250403</td>\n",
|
||
" <td>9.931547</td>\n",
|
||
" <td>358.682347</td>\n",
|
||
" <td>110.209609</td>\n",
|
||
" <td>1240.640141</td>\n",
|
||
" <td>2.370105</td>\n",
|
||
" <td>17.999040</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>16 rows × 1782 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" sales \\\n",
|
||
"store_nbr 1 \n",
|
||
"family AUTOMOTIVE BABY CARE BEAUTY BEVERAGES BOOKS BREAD/BAKERY \n",
|
||
"date \n",
|
||
"2017-08-16 4.530377 0.0 3.476643 2435.125539 0.539017 394.844494 \n",
|
||
"2017-08-17 4.243753 0.0 2.506978 2069.480159 0.146672 341.867130 \n",
|
||
"2017-08-18 5.801745 0.0 2.413335 2421.619056 0.123446 369.494607 \n",
|
||
"2017-08-19 4.404581 0.0 3.034082 2484.809788 -0.199899 380.191686 \n",
|
||
"2017-08-20 1.746713 0.0 1.132583 1047.939563 -0.242533 124.285728 \n",
|
||
"2017-08-21 4.197514 0.0 3.489445 2402.995314 -0.202355 380.765232 \n",
|
||
"2017-08-22 4.380181 0.0 3.885605 2373.957701 0.247319 338.210541 \n",
|
||
"2017-08-23 5.449226 0.0 4.377596 2482.942320 0.395875 401.550486 \n",
|
||
"2017-08-24 5.966248 0.0 3.532936 2078.429559 0.129517 338.248806 \n",
|
||
"2017-08-25 7.942797 0.0 3.253891 2359.660562 0.170662 355.829919 \n",
|
||
"2017-08-26 6.433461 0.0 3.414250 2332.943993 -0.166271 359.026534 \n",
|
||
"2017-08-27 3.159030 0.0 0.940934 811.966181 -0.285951 100.005596 \n",
|
||
"2017-08-28 4.693311 0.0 2.850934 2118.776864 -0.344358 358.929538 \n",
|
||
"2017-08-29 3.975547 0.0 3.129241 2102.279061 0.037795 324.592586 \n",
|
||
"2017-08-30 4.469587 0.0 3.900584 2293.759485 0.190055 400.848069 \n",
|
||
"2017-08-31 4.917283 0.0 3.620152 2028.937558 0.007847 352.649771 \n",
|
||
"\n",
|
||
" ... \\\n",
|
||
"store_nbr ... 9 \n",
|
||
"family CELEBRATION CLEANING DAIRY DELI ... MAGAZINES \n",
|
||
"date ... \n",
|
||
"2017-08-16 19.155803 837.879756 825.955214 140.726314 ... 3.273131 \n",
|
||
"2017-08-17 21.657030 668.495547 680.198257 118.434844 ... 2.869787 \n",
|
||
"2017-08-18 23.541928 752.379160 793.051857 180.492023 ... 3.098471 \n",
|
||
"2017-08-19 14.470116 619.148805 756.691792 142.406902 ... 4.445279 \n",
|
||
"2017-08-20 3.314559 218.241260 293.710673 61.056835 ... 5.587107 \n",
|
||
"2017-08-21 14.954285 671.859086 727.873511 145.608057 ... 2.926209 \n",
|
||
"2017-08-22 16.676904 769.524478 706.798103 134.702782 ... 2.673161 \n",
|
||
"2017-08-23 18.868402 866.401057 828.422416 135.273727 ... 2.636684 \n",
|
||
"2017-08-24 23.048885 689.752267 678.907893 109.559662 ... 2.113239 \n",
|
||
"2017-08-25 25.967477 756.719186 782.699559 169.236486 ... 2.071389 \n",
|
||
"2017-08-26 17.047643 597.937552 732.941236 130.398960 ... 3.199484 \n",
|
||
"2017-08-27 5.257965 169.218185 256.317846 50.524195 ... 4.358689 \n",
|
||
"2017-08-28 15.878498 603.429651 683.193370 139.224362 ... 2.033654 \n",
|
||
"2017-08-29 16.731948 701.063303 667.748359 135.095252 ... 2.375713 \n",
|
||
"2017-08-30 18.626602 823.291951 810.947465 144.203969 ... 3.011725 \n",
|
||
"2017-08-31 23.188303 694.554224 695.823441 127.045596 ... 3.002575 \n",
|
||
"\n",
|
||
" \\\n",
|
||
"store_nbr \n",
|
||
"family MEATS PERSONAL CARE PET SUPPLIES PLAYERS AND ELECTRONICS \n",
|
||
"date \n",
|
||
"2017-08-16 315.692006 321.105247 8.063437 8.086183 \n",
|
||
"2017-08-17 515.972387 311.906195 6.568633 8.765447 \n",
|
||
"2017-08-18 295.035107 265.940697 7.409832 7.368460 \n",
|
||
"2017-08-19 413.255484 547.682569 10.337513 13.942843 \n",
|
||
"2017-08-20 422.096330 593.025760 10.615341 15.512749 \n",
|
||
"2017-08-21 320.496000 358.175192 5.904555 7.695611 \n",
|
||
"2017-08-22 294.686378 339.686169 5.467773 7.964100 \n",
|
||
"2017-08-23 279.782068 295.311729 7.200184 6.601095 \n",
|
||
"2017-08-24 472.197635 281.027237 5.913435 6.892363 \n",
|
||
"2017-08-25 248.780778 229.683957 7.087813 5.200893 \n",
|
||
"2017-08-26 367.677208 505.829741 10.343172 11.641134 \n",
|
||
"2017-08-27 380.290332 549.642942 10.842465 13.365095 \n",
|
||
"2017-08-28 288.673828 325.768281 6.247588 6.148403 \n",
|
||
"2017-08-29 283.625018 339.912089 5.936333 7.563280 \n",
|
||
"2017-08-30 302.623420 354.074686 7.967787 7.826197 \n",
|
||
"2017-08-31 538.870256 418.684592 7.250403 9.931547 \n",
|
||
"\n",
|
||
" \\\n",
|
||
"store_nbr \n",
|
||
"family POULTRY PREPARED FOODS PRODUCE SCHOOL AND OFFICE SUPPLIES \n",
|
||
"date \n",
|
||
"2017-08-16 317.477594 103.291276 1153.840125 2.742248 \n",
|
||
"2017-08-17 315.186602 100.110891 1145.386003 2.450307 \n",
|
||
"2017-08-18 457.824625 97.760310 1105.598207 1.829857 \n",
|
||
"2017-08-19 536.649932 151.770182 1792.815317 2.798092 \n",
|
||
"2017-08-20 602.836709 150.962957 2182.417892 4.899568 \n",
|
||
"2017-08-21 354.743218 99.771705 1378.102781 2.575910 \n",
|
||
"2017-08-22 322.597153 103.774080 2236.205025 3.275714 \n",
|
||
"2017-08-23 300.432556 95.190160 1113.452235 2.927540 \n",
|
||
"2017-08-24 293.766398 91.596539 1056.203888 3.005488 \n",
|
||
"2017-08-25 432.854896 88.810096 955.003938 2.653712 \n",
|
||
"2017-08-26 506.633259 142.753254 1584.992754 3.685511 \n",
|
||
"2017-08-27 567.325041 143.175022 1944.965118 5.594332 \n",
|
||
"2017-08-28 319.066872 95.537720 1161.381639 2.861736 \n",
|
||
"2017-08-29 300.582429 105.845232 2103.366931 3.074228 \n",
|
||
"2017-08-30 312.268704 105.552481 1122.700845 2.358178 \n",
|
||
"2017-08-31 358.682347 110.209609 1240.640141 2.370105 \n",
|
||
"\n",
|
||
" \n",
|
||
"store_nbr \n",
|
||
"family SEAFOOD \n",
|
||
"date \n",
|
||
"2017-08-16 17.715609 \n",
|
||
"2017-08-17 15.519307 \n",
|
||
"2017-08-18 16.994486 \n",
|
||
"2017-08-19 27.114291 \n",
|
||
"2017-08-20 26.631198 \n",
|
||
"2017-08-21 18.710922 \n",
|
||
"2017-08-22 20.822769 \n",
|
||
"2017-08-23 18.084607 \n",
|
||
"2017-08-24 15.767767 \n",
|
||
"2017-08-25 16.124829 \n",
|
||
"2017-08-26 24.610141 \n",
|
||
"2017-08-27 22.796808 \n",
|
||
"2017-08-28 14.610827 \n",
|
||
"2017-08-29 17.864347 \n",
|
||
"2017-08-30 17.457082 \n",
|
||
"2017-08-31 17.999040 \n",
|
||
"\n",
|
||
"[16 rows x 1782 columns]"
|
||
]
|
||
},
|
||
"execution_count": 19,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"y_forecast = pd.DataFrame(model.predict(X_test), index=X_test.index, columns=y.columns)\n",
|
||
"y_forecast"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"y_submit = y_forecast.stack(['store_nbr', 'family'])\n",
|
||
"y_submit = y_submit.join(df_test.id).reindex(columns=['id', 'sales'])\n",
|
||
"y_submit.to_csv('submission.csv', index=False)"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"kubeflow_notebook": {
|
||
"autosnapshot": true,
|
||
"experiment": {
|
||
"id": "",
|
||
"name": ""
|
||
},
|
||
"experiment_name": "",
|
||
"katib_metadata": {
|
||
"algorithm": {
|
||
"algorithmName": "grid"
|
||
},
|
||
"maxFailedTrialCount": 3,
|
||
"maxTrialCount": 12,
|
||
"objective": {
|
||
"objectiveMetricName": "",
|
||
"type": "minimize"
|
||
},
|
||
"parallelTrialCount": 3,
|
||
"parameters": []
|
||
},
|
||
"katib_run": false,
|
||
"pipeline_description": "",
|
||
"pipeline_name": "",
|
||
"snapshot_volumes": true,
|
||
"steps_defaults": [
|
||
"label:access-ml-pipeline:true",
|
||
"label:access-rok:true"
|
||
],
|
||
"volume_access_mode": "rwm",
|
||
"volumes": [
|
||
{
|
||
"annotations": [],
|
||
"mount_point": "/home/jovyan",
|
||
"name": "store-sales-workspace-tx8b7",
|
||
"size": 20,
|
||
"size_type": "Gi",
|
||
"snapshot": false,
|
||
"type": "clone"
|
||
}
|
||
]
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.6.9"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 4
|
||
}
|