def create_plot_5a_comparison_timeseries(
df: pd.DataFrame,
forecasts: dict[str, pd.DataFrame],
cutoff_date="2024-08-31",
cutoff_start="2024-06-01",
):
"""Plot 5a: Synthefy forecast vs. Univariate Forecast - Step 5a"""
print(
"\n=== PLOT 5a: Aggregate Performance: Univariate vs Multivariate ==="
)
# Prepare data
history_df, target_period_df = prepare_forecasting_data(df, cutoff_date)
history_df = history_df[history_df["timestamp"] >= cutoff_start]
forecast_prophet = forecasts["prophet"]
forecast_arima = forecasts["arima"]
forecast_with_events = forecasts["multivariate"]
# Calculate metrics for Prophet
prophet_mae = mean_absolute_error(
target_period_df["num_rooms_booked"].values,
forecast_prophet["num_rooms_booked"].values,
)
prophet_mape = (
mean_absolute_percentage_error(
target_period_df["num_rooms_booked"].values,
forecast_prophet["num_rooms_booked"].values,
)
* 100
)
# Calculate metrics for ARIMA
arima_mae = mean_absolute_error(
target_period_df["num_rooms_booked"].values,
forecast_arima["num_rooms_booked"].values,
)
arima_mape = (
mean_absolute_percentage_error(
target_period_df["num_rooms_booked"].values,
forecast_arima["num_rooms_booked"].values,
)
* 100
)
# Calculate metrics for multivariate
multivariate_mae = mean_absolute_error(
target_period_df["num_rooms_booked"].values,
forecast_with_events["num_rooms_booked"].values,
)
multivariate_mape = (
mean_absolute_percentage_error(
target_period_df["num_rooms_booked"].values,
forecast_with_events["num_rooms_booked"].values,
)
* 100
)
# Create plot
_ = plt.figure(figsize=(14, 6))
ax = plt.subplot(1, 1, 1)
# Ensure data is sorted by timestamp for proper line connections
history_df = history_df.sort_values("timestamp")
target_period_df = target_period_df.sort_values("timestamp")
forecast_prophet = forecast_prophet.sort_values("timestamp")
forecast_arima = forecast_arima.sort_values("timestamp")
forecast_with_events = forecast_with_events.sort_values("timestamp")
# Plot Prophet forecast - GREEN
ax.plot(
forecast_prophet["timestamp"],
forecast_prophet["num_rooms_booked"],
color=colors["prophet"],
linewidth=1.5,
label=f"Prophet (MAPE: {prophet_mape:.2f}%)",
linestyle="-",
alpha=0.7,
zorder=3,
)
# Plot ARIMA forecast - BLUE
ax.plot(
forecast_arima["timestamp"],
forecast_arima["num_rooms_booked"],
color=colors["arima"],
linewidth=1.5,
label=f"Seasonal ARIMA (MAPE: {arima_mape:.2f}%)",
linestyle="-",
alpha=0.7,
zorder=2,
)
# Plot Synthefy forecast (multivariate) - ORANGE
ax.plot(
forecast_with_events["timestamp"],
forecast_with_events["num_rooms_booked"],
color=colors["synthefy"],
linewidth=1.5,
label=f"Synthefy (Multivariate) (MAPE: {multivariate_mape:.2f}%)",
linestyle="-",
alpha=0.7,
zorder=1,
)
# Plot ground truth (actual data) throughout entire period - BLACK LINE (lowest z-order)
full_data = pd.concat([history_df, target_period_df]).sort_values(
"timestamp"
)
ax.plot(
full_data["timestamp"],
full_data["num_rooms_booked"],
color=colors["groundtruth"],
linewidth=2,
label="Actual Demand",
zorder=0,
)
# Add vertical line at cutoff date - RED DASHED LINE
cutoff_ts = pd.Timestamp(cutoff_date)
ax.axvline(
x=cutoff_ts, # type: ignore
color="red",
linestyle="--",
linewidth=2,
alpha=0.7,
label="Train/Test Split",
zorder=1,
)
# Add shaded forecast horizon
forecast_dates = forecast_with_events["timestamp"]
if len(forecast_dates) > 0:
ax.axvspan(
forecast_dates.min(),
forecast_dates.max(),
color="gray",
alpha=0.1,
zorder=0,
)
# Set labels and title
ax.set_xlabel("Time (timestamp)")
ax.set_ylabel("Rooms booked (rooms/day)")
ax.set_title("Aggregate Performance: Univariate vs Multivariate")
# Add grid for better readability
ax.grid(True, linestyle="--", alpha=0.3)
# Add legend
ax.legend(loc="upper left", frameon=True)
# Format x-axis
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plot_path = "hotel_demand/plot_5a_comparison_timeseries.png"
plt.savefig(plot_path, dpi=300, bbox_inches="tight")
print(f"Plot 5a saved to: {plot_path}")
plt.show()
# Return metrics as a list of tuples (label, mae, mape) for plotting
return [
("Prophet", prophet_mae, prophet_mape),
("SARIMA", arima_mae, arima_mape),
("Synthefy", multivariate_mae, multivariate_mape),
]
def create_plot_5b_performance_metrics(
forecast_metrics: list,
) -> tuple[float, float]:
"""Plot 5b: Performance metrics comparison - Step 5b
Args:
forecast_metrics: List of tuples (label, mae, mape)
"""
print("\n=== PLOT 5b: Aggregate Performance: Baseline vs Multivariate ===")
# Extract data
models = [label for label, _, _ in forecast_metrics]
mae_values = [mae for _, mae, _ in forecast_metrics]
mape_values = [mape for _, _, mape in forecast_metrics]
# Create figure with 2 subplots side by side
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 7))
x = np.arange(len(models))
# Define colors for each model (Prophet=green, ARIMA=blue, Synthefy=orange)
bar_colors = [colors["prophet"], colors["arima"], colors["synthefy"]]
# ===== Plot 1: MAE =====
bars1 = ax1.bar(
x,
mae_values,
alpha=0.8,
color=bar_colors,
edgecolor="black",
linewidth=1,
)
# Add value labels on top of MAE bars
for i, bar in enumerate(bars1):
ax1.text(
bar.get_x() + bar.get_width() / 2,
bar.get_height() + max(mae_values) * 0.02,
f"{mae_values[i]:.2f}",
ha="center",
va="bottom",
color=bar_colors[i],
)
# Set labels and title for MAE plot
ax1.set_xlabel("Models")
ax1.set_ylabel("MAE (Mean Absolute Error)")
ax1.set_title(
"MAE Comparison"
)
ax1.set_xticks(x)
ax1.set_xticklabels(models, rotation=45, ha="right")
ax1.grid(False)
# Set y-axis limit to prevent overflow
ax1.set_ylim(0, max(mae_values) * 1.15)
# ===== Plot 2: MAPE =====
bars2 = ax2.bar(
x,
mape_values,
alpha=0.8,
color=bar_colors,
edgecolor="black",
linewidth=1,
)
# Add value labels on top of MAPE bars
for i, bar in enumerate(bars2):
ax2.text(
bar.get_x() + bar.get_width() / 2,
bar.get_height() + max(mape_values) * 0.02,
f"{mape_values[i]:.1f}%",
ha="center",
va="bottom",
color=bar_colors[i],
)
# Set labels and title for MAPE plot
ax2.set_xlabel("Models")
ax2.set_ylabel("MAPE (Mean Absolute Percentage Error %)")
ax2.set_title(
"MAPE Comparison"
)
ax2.set_xticks(x)
ax2.set_xticklabels(models, rotation=45, ha="right")
ax2.grid(False)
# Set y-axis limit to prevent overflow
ax2.set_ylim(0, max(mape_values) * 1.15)
plt.tight_layout()
plot_path = "hotel_demand/plot_5b_performance_metrics.png"
plt.savefig(plot_path, dpi=300, bbox_inches="tight")
print(f"Plot 5b saved to: {plot_path}")
plt.show()
# Calculate improvement (comparing first baseline with Synthefy)
synthefy_mape = mape_values[-1]
improvements = (
(mape_values[0] - synthefy_mape) / mape_values[0] * 100,
(mae_values[1] - synthefy_mape) / mae_values[1] * 100,
)
print(
f"Conclusion: Synthefy shows {improvements[0]:.1f}% improvement over baseline methods."
)
return improvements
# Create plots
print("\n" + "=" * 60)
print("CREATING PLOTS")
print("=" * 60)
# Plot 5a: Time series comparison
forecast_metrics = create_plot_5a_comparison_timeseries(
data_df,
{
"prophet": prophet_df,
"arima": sarima_df,
"multivariate": forecast_with_events,
},
cutoff_date="2024-08-31",
)
# Plot 5b: Performance metrics comparison
improvements = create_plot_5b_performance_metrics(forecast_metrics)
# Summary
print("\n" + "=" * 60)
print("SUMMARY")
print("=" * 60)
for label, mae, mape in forecast_metrics:
print(f"{label} MAE: {mae:.2f}, MAPE: {mape:.2f}%")
print(f"Improvement (Synthefy vs baseline SARIMA): -{improvements[0]:.1f}%")
print(f"Improvement (Synthefy vs baseline Prophet): -{improvements[1]:.1f}%")