# Data manipulation and analysis imports
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

# Plotting imports
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.graph_objs import Figure
import plotly.io as pio
pio.renderers.default = 'notebook' # display in notebook

# Statistical testing imports
import statsmodels.api as sm
import statsmodels.formula.api as smf

# Importing the cleaned data from task one
data = pd.read_csv('../data/clean_qvi_data.csv')

# Creating a year_month column for easier time series analysis and visualisation
data['date'] = pd.to_datetime(data['date'])
data['year_month'] = data['date'].dt.to_period('M').dt.to_timestamp()

# Getting metrics for all stores by month
monthly_metrics = data.groupby(['store_nbr', 'year_month']).agg({
                    'tot_sales': 'sum',
                    'lylty_card_nbr': 'nunique',
                    'num_transactions': 'count'
                }).rename(columns={
                    'tot_sales': 'monthly_sales',
                    'lylty_card_nbr': 'monthly_num_customers',
                    'num_transactions': 'monthly_num_transactions'
                }).reset_index()

# Adding new metric
monthly_metrics['avg_monthly_txn_per_customer'] = monthly_metrics['monthly_num_transactions'] / monthly_metrics['monthly_num_customers']

# Creating a period column to identify pre-trial, trial and post-trial periods
monthly_metrics['period'] = 'post-trial'

monthly_metrics.loc[
    (monthly_metrics['year_month'] < '2019-02'), 'period'
] = 'pre-trial'

monthly_metrics.loc[
    (monthly_metrics['year_month'] >= '2019-02') &
    (monthly_metrics['year_month'] < '2019-05'), 
    'period'
] = 'trial'

# Splitting the data into pre-trial and trial periods for comparison
pre_data = monthly_metrics[monthly_metrics['period'] == 'pre-trial']
trial_data = monthly_metrics[monthly_metrics['period'] == 'trial']

# Creating a list of stores participating in the trial
test_stores = [77, 86, 88]

# Creating a list of metrics
metrics = ['monthly_sales', 'monthly_num_customers', 'avg_monthly_txn_per_customer']

# Getting control canditates for stores participating in the trial.

# Pivoting the data to have stores as columns and year_month as index for each metric
sales_pivot = pre_data.pivot(
    index='year_month',
    columns='store_nbr',
    values='monthly_sales')

customers_pivot = pre_data.pivot(
    index='year_month',
    columns='store_nbr', 
    values='monthly_num_customers')

txn_pivot = pre_data.pivot(
    index='year_month',
    columns='store_nbr',
    values='avg_monthly_txn_per_customer')

# Getting the correlations between the trial stores and all other stores for each metric
sales_corr = sales_pivot.corr().round(2)
customers_corr = customers_pivot.corr().round(2)
txn_corr = txn_pivot.corr().round(2)

# Combining the correlation results to identify the most similar control store for each trial store
similarity_rows = []

for store in test_stores:
    sales_controls = sales_corr[store].drop(test_stores)
    customers_controls = customers_corr[store].drop(test_stores)
    txn_controls = txn_corr[store].drop(test_stores) 

    combined_similarity = ((sales_controls + customers_controls + txn_controls) / 3).round(2)
    combined_similarity = combined_similarity.dropna()
    combined_similarity = combined_similarity.sort_values(ascending=False).head(1)

    similarity_rows.append({
        'test_store': store,
        'control_store': combined_similarity.index[0],
        'combined_similarity': combined_similarity.iloc[0]
        })

similarity_df = pd.DataFrame(similarity_rows)
similarity_df

# Saving the control stores as a list
control_stores = similarity_df['control_store'].tolist()

# Creating a dictionary to map trial stores to their selected control stores
store_pairs = dict(zip(test_stores, control_stores))

# Function to prepare data for plotting
def prepare_plot(monthly_metrics, store, control):
    '''
    Description: This function filters the monthly_metrics DataFrame to include only the specified store and control store, and only for the pre-trial and trial periods. This prepares the data for plotting.

    Parameters: 
                monthly_metrics: pandas dataframe. The DataFrame containing the monthly metrics for all stores.
                store: int. The store number of the test store.
                control: int. The store number of the control store.
    Returns:
                plot_data: pandas dataframe. The filtered DataFrame containing only the data for the specified store and control store, and only for the pre-trial and trial periods.
    '''
    plot_data = monthly_metrics[(monthly_metrics['period'].isin(['pre-trial', 'trial'])) & (monthly_metrics['store_nbr'].isin([store, control]))]
    return plot_data

# Function to create timeline plots for the specified store and control store
def timeline_plot(plot_data, store, control):
    '''
    Description: This function creates a timeline plot with three subplots for the specified store and control store. The subplots show the monthly sales, monthly number of customers, and average number of transactions per customer over time. A vertical line is added to indicate the start of the trial period.

    Parameters: plot_data: pandas dataframe. The DataFrame containing the data for the specified store and control store, and only for the pre-trial and trial periods.
                store: int. The store number of the test store.
                control: int. The store number of the control store.
    
    Returns: fig: plotly figure. The timeline plot with three subplots for the specified store and
    
    '''
    line1 = px.line(
        data_frame=plot_data,
        x='year_month',
        y='monthly_sales',
        color='store_nbr',
        title=f'Monthly Sales for Store {store} and Control Store {control}',
        labels={'monthly_sales': 'Monthly Sales'}
    )


    line2 = px.line(
        data_frame=plot_data,
        x='year_month',
        y='monthly_num_customers',
        color='store_nbr',
        title=f'Monthly Number of Customers for Store {store} and Control Store {control}',
        labels={'monthly_num_customers': 'Monthly Number of Customers' }
    )

    line3 = px.line(
        data_frame=plot_data,
        x='year_month',
        y='avg_monthly_txn_per_customer',
        color='store_nbr',
        title=f'Average Number of Transactions per Customer for Store {store} and Control Store {control}',
        labels={'avg_monthly_txn_per_customer': 'Average Number of Transactions per Customer' }
    )

    fig = make_subplots(
        rows=3,
        cols=1,
        subplot_titles=(f'Monthly Sales for Store {store} and Control Store {control}',
                        f'Monthly Number of Customers for Store {store} and Control Store {control}',
                        f'Average Number of Transactions per Customer for Store {store} and Control Store {control}')
    )

    # Add traces
    for trace in line1.data:
        fig.add_trace(trace, row=1, col=1)

    for trace in line2.data:
        trace.showlegend=False
        fig.add_trace(trace, row=2, col=1)

    for trace in line3.data:
        trace.showlegend=False
        fig.add_trace(trace, row=3, col=1)

    # Add vertical lines
    for i in range(1, 4):
        fig.add_vline(
            x='2019-02',
            line_width=2, 
            line_dash='dash', 
            line_color='black',
            row=i,
            col=1
        )

    # Add annotation for trial start
    for i in range(1, 4):
        fig.add_annotation(
            x=0.78,
            y=0.75,
            xref='x domain',
            yref='y domain',
            text='Trial Start',
            showarrow=False,
            font=dict(color='black'),
            row=i,
            col=1)
    
    fig.update_layout(
        title=f'Monthly Metrics Store {store} and Control Store {control}',
        height=1000,
        width=800,
        legend_title_text='Store Number',
    )

    fig.show()

# Preparing the data for plotting for each trial store and its corresponding control store
plot_77_data = prepare_plot(monthly_metrics, 77, 119)
plot_86_data = prepare_plot(monthly_metrics, 86, 138)
plot_88_data = prepare_plot(monthly_metrics, 88, 178)

timeline_plot(plot_77_data, 77, 119)

timeline_plot(plot_86_data, 86, 138)

timeline_plot(plot_88_data, 88, 178)

def prepare_did(test_store, control_store, monthly_metrics): 
    '''
    Description: Prepares data for difference-in-differences analysis by creating a new DataFrame that includes the necessary columns for the analysis, such as 'exp_group', 'period', and 'interaction'.

    Parameters: test_store: int. The store number of the test store.
                control_store: int. The store number of the control store.
                monthly_metrics: pandas dataframe. The DataFrame containing the monthly metrics for all stores.

    Returns: pandas dataframe. A new DataFrame that includes the necessary columns for difference-in-differences analysis.
    '''   
    did_data = monthly_metrics[monthly_metrics['store_nbr'].isin([test_store, control_store])].copy()
    did_data['exp_group'] = did_data['store_nbr'].apply(lambda x: 1 if x == test_store else 0)
    did_data['period'] = did_data['period'].apply(lambda x: 1 if x == 'trial' else 0)
    did_data['interaction'] = did_data['exp_group'] * did_data['period']

    return did_data

# Applying the prepare_did function
did_77 = prepare_did(77, 119, monthly_metrics)
did_86 = prepare_did(86, 138, monthly_metrics)
did_88 = prepare_did(88, 178, monthly_metrics)

did_coefs = []
def run_did(prepped_did_df, metrics, test_store, control_store):
    '''
    Description: Conducts difference-in-differences analysis by fitting an OLS regression model for each specified metric, using the prepared DataFrame. The function extracts the coefficients for the interaction term and stores them in a list of tuples.

    Parameters: prepped_did_df: pandas DataFrame. The DataFrame that has been prepared for difference-in-differences analysis, containing the necessary columns such as 'exp_group', 'period', and 'interaction'.
                metrics: list of strings. A list of the metric column names for which the difference-in-differences analysis will be conducted.
                test_store: int. The store number of the test store.
                control_store: int. The store number of the control store.

    Returns: did_results: dictionary. A dictionary containing the summary of the OLS regression results for each metric.
            Additionally, the function appends a tuple containing the test store number, control store number, metric name, and the rounded coefficient of the interaction term to the did_coefs list.

    '''
    did_results = {}
    for metric in metrics:
       did_model = smf.ols(f'{metric} ~ exp_group + period + interaction', data=prepped_did_df)
       result = did_model.fit()
       did_results[metric] = result.summary()
       coef = result.params['interaction']
       did_coefs.append((test_store, control_store, metric, coef.round(4)))

    return did_results

# Running the DiD test for each pair
did_results_77 = run_did(did_77, metrics, 77, 119)
did_results_77

{'monthly_sales': <class 'statsmodels.iolib.summary.Summary'>
 """
                             OLS Regression Results                            
 ==============================================================================
 Dep. Variable:          monthly_sales   R-squared:                       0.975
 Model:                            OLS   Adj. R-squared:                  0.971
 Method:                 Least Squares   F-statistic:                     259.3
 Date:                Fri, 27 Feb 2026   Prob (F-statistic):           3.58e-16
 Time:                        17:55:29   Log-Likelihood:                -130.43
 No. Observations:                  24   AIC:                             268.9
 Df Residuals:                      20   BIC:                             273.6
 Df Model:                           3                                         
 Covariance Type:            nonrobust                                         
 ===============================================================================
                   coef    std err          t      P>|t|      [0.025      0.975]
 -------------------------------------------------------------------------------
 Intercept     941.9333     20.247     46.523      0.000     899.699     984.167
 exp_group    -690.4889     28.633    -24.115      0.000    -750.217    -630.761
 period         11.7667     40.494      0.291      0.774     -72.701      96.235
 interaction    -4.2111     57.267     -0.074      0.942    -123.667     115.245
 ==============================================================================
 Omnibus:                        1.425   Durbin-Watson:                   1.185
 Prob(Omnibus):                  0.490   Jarque-Bera (JB):                0.420
 Skew:                          -0.243   Prob(JB):                        0.811
 Kurtosis:                       3.427   Cond. No.                         6.50
 ==============================================================================
 
 Notes:
 [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
 """,
 'monthly_num_customers': <class 'statsmodels.iolib.summary.Summary'>
 """
                               OLS Regression Results                             
 =================================================================================
 Dep. Variable:     monthly_num_customers   R-squared:                       0.933
 Model:                               OLS   Adj. R-squared:                  0.923
 Method:                    Least Squares   F-statistic:                     92.41
 Date:                   Fri, 27 Feb 2026   Prob (F-statistic):           6.82e-12
 Time:                           17:55:29   Log-Likelihood:                -79.702
 No. Observations:                     24   AIC:                             167.4
 Df Residuals:                         20   BIC:                             172.1
 Df Model:                              3                                         
 Covariance Type:               nonrobust                                         
 ===============================================================================
                   coef    std err          t      P>|t|      [0.025      0.975]
 -------------------------------------------------------------------------------
 Intercept      94.8889      2.446     38.790      0.000      89.786      99.992
 exp_group     -51.0000      3.459    -14.742      0.000     -58.216     -43.784
 period         -1.2222      4.892     -0.250      0.805     -11.428       8.983
 interaction     4.6667      6.919      0.674      0.508      -9.766      19.099
 ==============================================================================
 Omnibus:                        0.331   Durbin-Watson:                   1.324
 Prob(Omnibus):                  0.847   Jarque-Bera (JB):                0.308
 Skew:                          -0.233   Prob(JB):                        0.857
 Kurtosis:                       2.699   Cond. No.                         6.50
 ==============================================================================
 
 Notes:
 [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
 """,
 'avg_monthly_txn_per_customer': <class 'statsmodels.iolib.summary.Summary'>
 """
                                  OLS Regression Results                                 
 ========================================================================================
 Dep. Variable:     avg_monthly_txn_per_customer   R-squared:                       0.795
 Model:                                      OLS   Adj. R-squared:                  0.764
 Method:                           Least Squares   F-statistic:                     25.81
 Date:                          Fri, 27 Feb 2026   Prob (F-statistic):           4.43e-07
 Time:                                  17:55:29   Log-Likelihood:                 49.202
 No. Observations:                            24   AIC:                            -90.40
 Df Residuals:                                20   BIC:                            -85.69
 Df Model:                                     3                                         
 Covariance Type:                      nonrobust                                         
 ===============================================================================
                   coef    std err          t      P>|t|      [0.025      0.975]
 -------------------------------------------------------------------------------
 Intercept       1.1623      0.011    102.194      0.000       1.139       1.186
 exp_group      -0.1101      0.016     -6.846      0.000      -0.144      -0.077
 period          0.0313      0.023      1.374      0.185      -0.016       0.079
 interaction    -0.0430      0.032     -1.337      0.196      -0.110       0.024
 ==============================================================================
 Omnibus:                        1.657   Durbin-Watson:                   2.299
 Prob(Omnibus):                  0.437   Jarque-Bera (JB):                1.410
 Skew:                           0.453   Prob(JB):                        0.494
 Kurtosis:                       2.232   Cond. No.                         6.50
 ==============================================================================
 
 Notes:
 [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
 """}

did_results_86 = run_did(did_86, metrics, 86, 138)
did_results_86

{'monthly_sales': <class 'statsmodels.iolib.summary.Summary'>
 """
                             OLS Regression Results                            
 ==============================================================================
 Dep. Variable:          monthly_sales   R-squared:                       0.112
 Model:                            OLS   Adj. R-squared:                 -0.021
 Method:                 Least Squares   F-statistic:                    0.8394
 Date:                Fri, 27 Feb 2026   Prob (F-statistic):              0.488
 Time:                        17:55:29   Log-Likelihood:                -141.71
 No. Observations:                  24   AIC:                             291.4
 Df Residuals:                      20   BIC:                             296.1
 Df Model:                           3                                         
 Covariance Type:            nonrobust                                         
 ===============================================================================
                   coef    std err          t      P>|t|      [0.025      0.975]
 -------------------------------------------------------------------------------
 Intercept     922.3778     32.395     28.473      0.000     854.804     989.952
 exp_group     -50.4722     45.813     -1.102      0.284    -146.036      45.092
 period        -81.2444     64.789     -1.254      0.224    -216.393      53.904
 interaction   138.7389     91.626      1.514      0.146     -52.389     329.867
 ==============================================================================
 Omnibus:                        1.516   Durbin-Watson:                   1.859
 Prob(Omnibus):                  0.469   Jarque-Bera (JB):                0.392
 Skew:                           0.033   Prob(JB):                        0.822
 Kurtosis:                       3.623   Cond. No.                         6.50
 ==============================================================================
 
 Notes:
 [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
 """,
 'monthly_num_customers': <class 'statsmodels.iolib.summary.Summary'>
 """
                               OLS Regression Results                             
 =================================================================================
 Dep. Variable:     monthly_num_customers   R-squared:                       0.132
 Model:                               OLS   Adj. R-squared:                  0.001
 Method:                    Least Squares   F-statistic:                     1.010
 Date:                   Fri, 27 Feb 2026   Prob (F-statistic):              0.409
 Time:                           17:55:29   Log-Likelihood:                -83.476
 No. Observations:                     24   AIC:                             175.0
 Df Residuals:                         20   BIC:                             179.7
 Df Model:                              3                                         
 Covariance Type:               nonrobust                                         
 ===============================================================================
                   coef    std err          t      P>|t|      [0.025      0.975]
 -------------------------------------------------------------------------------
 Intercept     104.2222      2.863     36.407      0.000      98.251     110.194
 exp_group      -4.3333      4.048     -1.070      0.297     -12.778       4.112
 period         -3.5556      5.725     -0.621      0.542     -15.499       8.387
 interaction    12.6667      8.097      1.564      0.133      -4.223      29.557
 ==============================================================================
 Omnibus:                        0.730   Durbin-Watson:                   1.826
 Prob(Omnibus):                  0.694   Jarque-Bera (JB):                0.061
 Skew:                           0.000   Prob(JB):                        0.970
 Kurtosis:                       3.248   Cond. No.                         6.50
 ==============================================================================
 
 Notes:
 [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
 """,
 'avg_monthly_txn_per_customer': <class 'statsmodels.iolib.summary.Summary'>
 """
                                  OLS Regression Results                                 
 ========================================================================================
 Dep. Variable:     avg_monthly_txn_per_customer   R-squared:                       0.070
 Model:                                      OLS   Adj. R-squared:                 -0.070
 Method:                           Least Squares   F-statistic:                    0.4990
 Date:                          Fri, 27 Feb 2026   Prob (F-statistic):              0.687
 Time:                                  17:55:29   Log-Likelihood:                 38.605
 No. Observations:                            24   AIC:                            -69.21
 Df Residuals:                                20   BIC:                            -64.50
 Df Model:                                     3                                         
 Covariance Type:                      nonrobust                                         
 ===============================================================================
                   coef    std err          t      P>|t|      [0.025      0.975]
 -------------------------------------------------------------------------------
 Intercept       1.2808      0.018     72.413      0.000       1.244       1.318
 exp_group      -0.0236      0.025     -0.942      0.358      -0.076       0.029
 period         -0.0295      0.035     -0.833      0.415      -0.103       0.044
 interaction     0.0200      0.050      0.401      0.693      -0.084       0.124
 ==============================================================================
 Omnibus:                        1.493   Durbin-Watson:                   2.250
 Prob(Omnibus):                  0.474   Jarque-Bera (JB):                0.747
 Skew:                           0.429   Prob(JB):                        0.688
 Kurtosis:                       3.101   Cond. No.                         6.50
 ==============================================================================
 
 Notes:
 [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
 """}

did_results_88 = run_did(did_88, metrics, 88, 178)
did_results_88

{'monthly_sales': <class 'statsmodels.iolib.summary.Summary'>
 """
                             OLS Regression Results                            
 ==============================================================================
 Dep. Variable:          monthly_sales   R-squared:                       0.933
 Model:                            OLS   Adj. R-squared:                  0.923
 Method:                 Least Squares   F-statistic:                     93.28
 Date:                Fri, 27 Feb 2026   Prob (F-statistic):           6.25e-12
 Time:                        17:55:29   Log-Likelihood:                -131.39
 No. Observations:                  24   AIC:                             270.8
 Df Residuals:                      20   BIC:                             275.5
 Df Model:                           3                                         
 Covariance Type:            nonrobust                                         
 ===============================================================================
                   coef    std err          t      P>|t|      [0.025      0.975]
 -------------------------------------------------------------------------------
 Intercept     906.1222     21.078     42.989      0.000     862.154     950.090
 exp_group     432.3722     29.809     14.505      0.000     370.192     494.552
 period        143.0111     42.156      3.392      0.003      55.076     230.947
 interaction   -52.5722     59.617     -0.882      0.388    -176.932      71.787
 ==============================================================================
 Omnibus:                       11.190   Durbin-Watson:                   1.354
 Prob(Omnibus):                  0.004   Jarque-Bera (JB):                9.615
 Skew:                          -1.214   Prob(JB):                      0.00817
 Kurtosis:                       4.928   Cond. No.                         6.50
 ==============================================================================
 
 Notes:
 [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
 """,
 'monthly_num_customers': <class 'statsmodels.iolib.summary.Summary'>
 """
                               OLS Regression Results                             
 =================================================================================
 Dep. Variable:     monthly_num_customers   R-squared:                       0.843
 Model:                               OLS   Adj. R-squared:                  0.820
 Method:                    Least Squares   F-statistic:                     35.87
 Date:                   Fri, 27 Feb 2026   Prob (F-statistic):           3.07e-08
 Time:                           17:55:29   Log-Likelihood:                -71.990
 No. Observations:                     24   AIC:                             152.0
 Df Residuals:                         20   BIC:                             156.7
 Df Model:                              3                                         
 Covariance Type:               nonrobust                                         
 ===============================================================================
                   coef    std err          t      P>|t|      [0.025      0.975]
 -------------------------------------------------------------------------------
 Intercept     102.1111      1.774     57.561      0.000      98.411     105.812
 exp_group      23.3333      2.509      9.301      0.000      18.100      28.566
 period         10.5556      3.548      2.975      0.007       3.155      17.956
 interaction    -7.3333      5.018     -1.462      0.159     -17.800       3.133
 ==============================================================================
 Omnibus:                        0.802   Durbin-Watson:                   1.668
 Prob(Omnibus):                  0.670   Jarque-Bera (JB):                0.823
 Skew:                          -0.291   Prob(JB):                        0.663
 Kurtosis:                       2.304   Cond. No.                         6.50
 ==============================================================================
 
 Notes:
 [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
 """,
 'avg_monthly_txn_per_customer': <class 'statsmodels.iolib.summary.Summary'>
 """
                                  OLS Regression Results                                 
 ========================================================================================
 Dep. Variable:     avg_monthly_txn_per_customer   R-squared:                       0.244
 Model:                                      OLS   Adj. R-squared:                  0.131
 Method:                           Least Squares   F-statistic:                     2.154
 Date:                          Fri, 27 Feb 2026   Prob (F-statistic):              0.125
 Time:                                  17:55:29   Log-Likelihood:                 38.030
 No. Observations:                            24   AIC:                            -68.06
 Df Residuals:                                20   BIC:                            -63.35
 Df Model:                                     3                                         
 Covariance Type:                      nonrobust                                         
 ===============================================================================
                   coef    std err          t      P>|t|      [0.025      0.975]
 -------------------------------------------------------------------------------
 Intercept       1.2837      0.018     70.862      0.000       1.246       1.321
 exp_group      -0.0545      0.026     -2.127      0.046      -0.108      -0.001
 period          0.0197      0.036      0.544      0.592      -0.056       0.095
 interaction     0.0098      0.051      0.191      0.850      -0.097       0.117
 ==============================================================================
 Omnibus:                        4.047   Durbin-Watson:                   1.857
 Prob(Omnibus):                  0.132   Jarque-Bera (JB):                2.229
 Skew:                           0.557   Prob(JB):                        0.328
 Kurtosis:                       3.994   Cond. No.                         6.50
 ==============================================================================
 
 Notes:
 [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
 """}

# Creating a dataframe to plot the DiD coefficients for each store.
did_plot_data = pd.DataFrame(did_coefs, columns=['test_store', 'control_store', 'metric', 'did_coef'])
did_plot_data['test_store'] = did_plot_data['test_store'].astype('category')
did_plot_data['control_store'] = did_plot_data['control_store'].astype('category')

# Plotting the DiD coefficients for each metric and store
bar = px.bar(
    data_frame=did_plot_data,
    x='metric',
    y='did_coef',
    labels={'metric': 'Metric', 'did_coef': 'Estimated Change (DiD Coefficient)', 'test_store': 'Test Store', 'control_store': 'Control Store'},
    barmode='group',
    color='test_store',
    title='DiD Coefficients by Metric for Test Stores vs Control Stores',
    text='did_coef',
    text_auto=True,
    hover_data=dict(control_store=True)
    )

bar.update_layout(title='Estimated Change (DiD Coefficients) by Metric for Test Stores vs Control Stores')

bar.update_xaxes(
    tickvals=['monthly_sales', 'monthly_num_customers', 'avg_monthly_txn_per_customer'],
    ticktext=['Monthly Sales', 'Monthly Number of Customers', 'Average Monthly Transactions/Customer']
    )

bar.show()

Quantium Chip Uplift Analysis¶

Quantium Retail Analytics Team: Uplift Analysis¶

Analysis by: Breanna Williams¶

02-26-2026¶

Executive Summary¶

Objective:¶

Key Findings:¶

Recommendations:¶

Limitations:¶

Full Analysis¶

Import Libraries & Packages¶

Import and Prepare Data¶

Select Control Stores¶

Visualize Trial vs. Control Stores¶

Insights:¶

Uplift Analysis - Difference-in-Differences(DID) Regression¶

Insights:¶

Conclusion¶

	test_store	control_store	combined_similarity
0	77	119	0.85
1	86	138	0.71
2	88	178	0.75