In [ ]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import tensorflow as tf
from tensorflow.keras import layers, models
import seaborn as sns
import pandas as pd
In [ ]:
# Load the 'flights' dataset from seaborn
flights = sns.load_dataset('flights')
In [ ]:
flights
Out[ ]:
year | month | passengers | |
---|---|---|---|
0 | 1949 | Jan | 112 |
1 | 1949 | Feb | 118 |
2 | 1949 | Mar | 132 |
3 | 1949 | Apr | 129 |
4 | 1949 | May | 121 |
... | ... | ... | ... |
139 | 1960 | Aug | 606 |
140 | 1960 | Sep | 508 |
141 | 1960 | Oct | 461 |
142 | 1960 | Nov | 390 |
143 | 1960 | Dec | 432 |
144 rows × 3 columns
In [ ]:
# Prepare the time series data
time_series_data = flights['passengers'].values
time_steps = 12 # Using a window of 12 months (1 year)
X, y = [], []
for i in range(len(time_series_data) - time_steps):
X.append(time_series_data[i:i + time_steps])
y.append(time_series_data[i + time_steps])
X = np.array(X)
y = np.array(y)
# Reshape X for 1D CNN
X = X.reshape(-1, time_steps, 1)
print(time_series_data[:15])
print(X[0,:,0])
print(X[1,:,0])
print(y[:2])
print(X.shape)
print(y.shape)
[112 118 132 129 121 135 148 148 136 119 104 118 115 126 141] [112 118 132 129 121 135 148 148 136 119 104 118] [118 132 129 121 135 148 148 136 119 104 118 115] [115 126] (132, 12, 1) (132,)
In [ ]:
# Plot the full time series
t = np.arange(len(time_series_data))
plt.figure(figsize=(12, 3))
plt.plot(t, time_series_data, label='Airline Passengers')
plt.xlabel('Time (Months)')
plt.ylabel('Number of Passengers')
plt.title('Airline Passengers Over Time')
plt.legend()
plt.show()
In [ ]:
In [ ]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Build the 1D CNN model
model = models.Sequential([
layers.Conv1D(32, kernel_size=3, activation='relu', input_shape=(time_steps, 1)),
layers.MaxPooling1D(pool_size=2),
layers.Conv1D(64, kernel_size=3, activation='relu'),
layers.GlobalAveragePooling1D(),
layers.Dense(50, activation='relu'),
layers.Dense(1)
])
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')
# Train the model
history = model.fit(X_train, y_train, epochs=25, validation_split=0.1)
Epoch 1/25
/usr/local/lib/python3.10/dist-packages/keras/src/layers/convolutional/base_conv.py:107: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(activity_regularizer=activity_regularizer, **kwargs)
3/3 ━━━━━━━━━━━━━━━━━━━━ 2s 139ms/step - loss: 81642.4531 - val_loss: 78684.9453 Epoch 2/25 3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 35ms/step - loss: 75499.3281 - val_loss: 65682.4219 Epoch 3/25 3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 26ms/step - loss: 59268.0625 - val_loss: 53558.5273 Epoch 4/25 3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 27ms/step - loss: 47788.9883 - val_loss: 40289.0430 Epoch 5/25 3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 23ms/step - loss: 37886.7148 - val_loss: 27205.5801 Epoch 6/25 3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 25ms/step - loss: 25103.9746 - val_loss: 15658.7041 Epoch 7/25 3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 22ms/step - loss: 16134.2773 - val_loss: 7122.2417 Epoch 8/25 3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 41ms/step - loss: 8428.2207 - val_loss: 3933.6521 Epoch 9/25 3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 27ms/step - loss: 3732.4946 - val_loss: 6675.1333 Epoch 10/25 3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 26ms/step - loss: 4305.7979 - val_loss: 11658.7129 Epoch 11/25 3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 16ms/step - loss: 6776.3887 - val_loss: 13384.7041 Epoch 12/25 3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 16ms/step - loss: 7420.2119 - val_loss: 10881.7100 Epoch 13/25 3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 17ms/step - loss: 5401.3750 - val_loss: 7423.1079 Epoch 14/25 3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 16ms/step - loss: 3812.3354 - val_loss: 5009.0483 Epoch 15/25 3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 23ms/step - loss: 3596.3770 - val_loss: 4057.4211 Epoch 16/25 3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 4162.7432 - val_loss: 3884.2405 Epoch 17/25 3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 17ms/step - loss: 3952.0967 - val_loss: 3868.0859 Epoch 18/25 3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 26ms/step - loss: 4106.3691 - val_loss: 3896.0859 Epoch 19/25 3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step - loss: 3715.5227 - val_loss: 4101.0923 Epoch 20/25 3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 17ms/step - loss: 3684.1990 - val_loss: 4522.3345 Epoch 21/25 3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 16ms/step - loss: 3683.0820 - val_loss: 5051.2363 Epoch 22/25 3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 17ms/step - loss: 3497.8530 - val_loss: 5381.6528 Epoch 23/25 3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 16ms/step - loss: 3558.4512 - val_loss: 5386.2437 Epoch 24/25 3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step - loss: 3438.4111 - val_loss: 5104.2617 Epoch 25/25 3/3 ━━━━━━━━━━━━━━━━━━━━ 0s 16ms/step - loss: 3664.5737 - val_loss: 4751.6230
In [ ]:
# Evaluate the model
test_loss = model.evaluate(X_test, y_test)
print(f"Test loss: {test_loss:.4f}")
# Plot training history
plt.figure(figsize=(6,3))
plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(loc='upper right')
plt.show()
# Predict and plot results
predictions = model.predict(X_test)
plt.figure(figsize=(7,3))
plt.plot(y_test, label='True Values')
plt.plot(predictions, label='Predictions')
plt.xlabel('Sample Index')
plt.ylabel('Number of Passengers')
plt.legend()
plt.show()
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 34ms/step - loss: 2351.5320 Test loss: 2351.5320
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 142ms/step
In [ ]:
In [ ]:
# Calculate R-squared and correlation coefficient
r2 = r2_score(y_test, predictions)
correlation_matrix = np.corrcoef(y_test, predictions.flatten())
correlation_coefficient = correlation_matrix[0, 1]
print(f"R-squared: {r2:.4f}")
print(f"Correlation Coefficient: {correlation_coefficient:.4f}")
# Create scatter plot
plt.figure(figsize=(4,4))
plt.scatter(y_test, predictions, alpha=0.6)
plt.xlabel('True Values')
plt.ylabel('Predicted Values')
plt.title('Scatter Plot of True vs. Predicted Values')
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--')
plt.grid()
plt.show()
R-squared: 0.8119 Correlation Coefficient: 0.9048
In [ ]: