import datetime
import numpy as np
import pandas as pd

from pandas_profiling import ProfileReport

import plotly.express as px
import plotly.graph_objects as go

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures


import plotly.io as pio

# for rendering plot static
# pio.renderers.default = "svg"


data_file_name = 'data/OGD_gest_kalwo_GEST_KALWOCHE_100.csv'
covid_deaths_data_file_name = 'data/timeline-faelle-bundeslaender.csv'

df = pd.read_csv(data_file_name, sep=';')  
df_deaths = pd.read_csv(covid_deaths_data_file_name, sep=';')

df


df_deaths


# profile = ProfileReport(df, title="Pandas Profiling Report")
# profile2 = ProfileReport(df_deaths, title="Pandas Profiling Report")


# profile2


df = df.rename(columns={"C-KALWOCHE-0": "cal_week", "C-B00-0": "state", "C-C11-0": "gender", "F-ANZ-1": "counts" })


df['year'] = [d.split("-")[1][:4] for d in df['cal_week']]
df['cal_week'] = [d.split("-")[1][4:] for d in df['cal_week']]
df['formatted_date'] = df.year.astype(str)  + df.cal_week.astype(str) + '0'
df['date'] = pd.to_datetime(df['formatted_date'], format='%Y%W%w')

# df['datetime'] = pd.to_datetime(df.year.astype(str) + '-' + 
#                                 df.cal_week.astype(str) + '-1' , format="%Y-%W-%w").dt.strftime('%Y-%W')


df['conv_date']= df.date.map(datetime.datetime.toordinal)

df


df.groupby('date').agg('sum')


grpd_date = df.groupby('date').agg('sum') 
grpd_date = grpd_date.rename(columns={'counts':'nr_deaths'})


# grpd_date['date'] = grpd_date.index
# grpd_date


temp = df_deaths.copy(deep=True)


df_deaths = temp.copy(deep=True)


df_deaths = df_deaths[df_deaths.Name == 'Österreich']


df_deaths


df_deaths = df_deaths.rename(columns={'Datum':'formatted_date', 'Todesfaelle':'deaths'})


df_deaths = df_deaths[['formatted_date', 'deaths']]
df_deaths


df_deaths['formatted_date'] = [d.split("T")[0] for d in df_deaths['formatted_date']]

df_deaths['date'] = pd.to_datetime(df_deaths['formatted_date'])

# df_deaths['formatted_date'] = pd.to_datetime(df_deaths['formatted_date'])
# df_deaths['formatted_date'] = df_deaths['formatted_date'].dt.tz_localize('CET', utc=True)
 
    
# df_deaths['date'] =  df_deaths['formatted_date'].dt.strftime('%Y-%m-%d')


df_deaths


grpd_date_df_deaths = df_deaths.groupby('date').agg('sum')


grpd_date_df_deaths['deaths_per_day'] = grpd_date_df_deaths.diff()


# https://www.thelancet.com/journals/lancet/article/PIIS0140-6736(21)02796-3/fulltext#seccestitle150
# austria has ratio of 1.33
# grpd_date_df_deaths['deaths_per_day_corrected'] = grpd_date_df_deaths.deaths_per_day * 1.33


grpd_date_df_deaths


fig = px.line(
    x=grpd_date_df_deaths.index,
    y=grpd_date_df_deaths.deaths_per_day, 
    title='Deaths per year'
)
fig.show()


total_df = pd.merge(grpd_date, grpd_date_df_deaths, how='left', on='date')
total_df


total_df = total_df.rename(
    columns={'deaths_per_day':'covid_deaths'})

total_df = total_df[['nr_deaths', 'covid_deaths']]


total_df


total_df.describe()


upper_limit = total_df['nr_deaths'].quantile(0.99999)
lower_limit = total_df['nr_deaths'].quantile(0.00001)

new_df = total_df[(
    total_df['nr_deaths'] <= upper_limit) & (
    total_df['nr_deaths'] >= lower_limit)]


new_df


final_df = new_df


only_covid = final_df[final_df.covid_deaths.notna()]
only_covid.shape

(67, 2)


# only if we want to limit all deaths to covid time period
# final_df = only_covid


fig = px.line(
    x=final_df.index,
    y=final_df.nr_deaths, 
    title='Deaths per year'
)

fig.add_traces(
    go.Scatter(
        x=final_df.index, y=final_df.covid_deaths, 
        name='Covid deaths'))

fig.show()


trace1 = go.Scatter(
                    x=final_df.index,
                    y=final_df.nr_deaths,
                    mode='lines',
                    line=dict(width=1.5))

trace2 = go.Scatter(
        x=final_df.index, y=final_df.covid_deaths, 
        name='Covid deaths')


frames=[
    dict(
        data=[
            dict(
                type = 'scatter',
                x=final_df.index[:k],
                y=final_df.nr_deaths[:k]),
    
            dict(
                type = 'scatter',
                x=final_df.index[:k],
                y=final_df.covid_deaths[:k])]
    )
    for k in range(0, len(final_df))]

layout = go.Layout(width=1000,
                   height=600,
                   showlegend=False,
                   hovermode='x unified',
                   updatemenus=[
                        dict(
                            type='buttons', showactive=False,
                            y=1.05,
                            x=1.15,
                            xanchor='right',
                            yanchor='top',
                            pad=dict(t=0, r=10),
                            buttons=[dict(label='Build line',
                            method='animate',
                            args=[None, 
                                  dict(frame=dict(duration=2, 
                                                  redraw=False),
                                                  transition=dict(duration=0),
                                                  fromcurrent=True,
                                                  mode='immediate')]
                            )]
                        ),
                        dict(
                            type = "buttons",
                            direction = "left",
                            buttons=list([
                                dict(
                                    args=[{"yaxis.type": "linear"}],
                                    label="LINEAR",
                                    method="relayout"
                                ),
                                dict(
                                    args=[{"yaxis.type": "log"}],
                                    label="LOG",
                                    method="relayout"
                                )
                            ]),
                        ),
                    ]              
                  )
# layout.update(xaxis =dict(range=['2020-03-16', '2020-06-13'], autorange=False),
#               yaxis =dict(range=[0, 35000], autorange=False));
fig = go.Figure(data=[trace1, trace2], frames=frames, layout=layout)

# fig.show()


fig = px.scatter(
    x=final_df.index,
    y=final_df.nr_deaths, 
    trendline="ols",
    trendline_color_override="red",
    opacity=.5,
    title='Deaths per year'
)
fig.show()

results = px.get_trendline_results(fig)
results = results.iloc[0]["px_fit_results"].summary()
print(results)
# regression params not available for lowess

                            OLS Regression Results                            
==============================================================================
Dep. Variable:                      y   R-squared:                       0.130
Model:                            OLS   Adj. R-squared:                  0.130
Method:                 Least Squares   F-statistic:                     174.4
Date:                Thu, 30 Jun 2022   Prob (F-statistic):           3.34e-37
Time:                        15:41:46   Log-Likelihood:                -7709.5
No. Observations:                1165   AIC:                         1.542e+04
Df Residuals:                    1163   BIC:                         1.543e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
==============================================================================
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const       1065.5178     34.203     31.152      0.000     998.410    1132.625
x1           3.43e-07    2.6e-08     13.206      0.000    2.92e-07    3.94e-07
==============================================================================
Omnibus:                      799.913   Durbin-Watson:                   0.576
Prob(Omnibus):                  0.000   Jarque-Bera (JB):            16455.842
Skew:                           2.882   Prob(JB):                         0.00
Kurtosis:                      20.487   Cond. No.                     8.49e+09
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 8.49e+09. This might indicate that there are
strong multicollinearity or other numerical problems.


x=final_df.index.values

y=final_df.nr_deaths.values
x = np.arange(0, len(y))
x = x.reshape(-1, 1)

model = LinearRegression()
model.fit(x, y)


x_range_ordinal = np.linspace(x.min(), x.max(), len(y))
y_range = model.predict(x_range_ordinal.reshape(-1, 1))

len(x_range_ordinal), len(y_range)

(1165, 1165)


fig = px.scatter(
    x=final_df.index,
    y=final_df.nr_deaths, 
    opacity=.5,
#     trendline='ols', trendline_color_override='darkblue',
    title='Deaths per year'
)


fig.add_traces(
    go.Scatter(
        x=final_df.index, 
        y=y_range, 
        name='Regression Fit'))


fig.show()


fig = px.scatter(
    x=final_df.index,
    y=final_df.nr_deaths, 
    trendline="lowess", 
    trendline_color_override="red",
    trendline_options=dict(frac=0.1),
    opacity=.5,
    title='Deaths per year'
)
fig.show()

# regression params not available for lowess


poly_degree = 10

y = final_df.nr_deaths.values

x = np.arange(0, len(y))
x = x.reshape(-1, 1)

# just for checking with sklearn implementation
poly = PolynomialFeatures(degree=poly_degree, include_bias=False)
poly_features = poly.fit_transform(x)
poly_reg_model = LinearRegression()
poly_reg_model.fit(poly_features, y)


# y_range_poly = poly_reg_model.predict(
#     poly_pred_x)
# print(poly_reg_model.intercept_)
# print(poly_reg_model.coef_)
###

#-----
fitted_params = np.polyfit(np.arange(0, len(y)), y, poly_degree )

polynomials = np.poly1d(fitted_params)

derivatives = np.polyder(polynomials)

y_value_at_point = polynomials(x).flatten()

slope_at_point = np.polyval(derivatives, np.arange(0, len(y)))
#-----



# x_range_ordinal_poly = np.linspace(x.min(), x.max(), len(y))

# poly_pred_x = poly.fit_transform(x_range_ordinal_poly.reshape(-1, 1))


print(f'''
x: {len(x), x},
y: {len(y), y},
''')

print(f'''
fittedparams: {fitted_params, fitted_params},

derivs: {derivatives},

y vals at point: {y_value_at_point, len(y_value_at_point)},

slope at point: {slope_at_point}''')

x: (1165, array([[   0],
       [   1],
       [   2],
       ...,
       [1162],
       [1163],
       [1164]])),
y: (1165, array([1867, 1902, 2027, ..., 1508, 1554, 1560])),


fittedparams: (array([ 1.12001971e-24, -7.91989854e-21,  2.36011784e-17, -3.88614603e-14,
        3.88520208e-11, -2.43476311e-08,  9.50992557e-06, -2.22093300e-03,
        2.82908130e-01, -1.62807870e+01,  1.71766195e+03]), array([ 1.12001971e-24, -7.91989854e-21,  2.36011784e-17, -3.88614603e-14,
        3.88520208e-11, -2.43476311e-08,  9.50992557e-06, -2.22093300e-03,
        2.82908130e-01, -1.62807870e+01,  1.71766195e+03])),

derivs:           9             8             7            6             5
1.12e-23 x - 7.128e-20 x + 1.888e-16 x - 2.72e-13 x + 2.331e-10 x
              4             3            2
 - 1.217e-07 x + 3.804e-05 x - 0.006663 x + 0.5658 x - 16.28,

y vals at point: (array([1717.66194592, 1701.66185556, 1686.21438827, ..., 1738.84192024,
       1735.21864265, 1731.40871437]), 1165),

slope at point: [-16.28078705 -15.72159567 -15.17550335 ...  -3.53128921  -3.71593226
  -3.90459934]


def draw_slope_line_at_point(fig, ind, x, y, slope_at_point, verbose=False):
    """Plot a line from an index at a specific point for x values, y values and their slopes"""
    
    
    y_low = (x[0] - x[ind]) * slope_at_point[ind] + y[ind]
    y_high = (x[-1] - x[ind]) * slope_at_point[ind] + y[ind]
    
    x_vals = [x[0], x[-1]]
    y_vals = [y_low, y_high]

    if verbose:
        print((x[0] - x[ind]))
        print(x[ind], x_vals, y_vals, y[ind],slope_at_point[ind])
    
    fig.add_trace(
            go.Scatter(
                x=x_vals, 
                y=y_vals, 
                name="Tangent at point", 
                line = dict(color='orange', width=2, dash='dash'),
            )
        )
    
    return x_vals, y_vals


fig = px.scatter(
    x=np.arange(0, len(y)),
    #x=final_df.index,
    y=final_df.nr_deaths, 
    opacity=.3,
    title='Deaths per year'
)


fig.add_traces(
    go.Scatter(
        x=np.arange(0, len(y)),
        #x=final_df.index.strftime('%Y-%m-%d').to_list(),
        y=y_value_at_point, 
        name='Polynomial regression Fit 2'))


# Replace x axis ticks with dates instead numbers
# fig.update_xaxes(
#     rangeslider_visible=True,
#     ticktext=final_df.index.strftime('%Y-%m-%d')[::10],
#     tickvals=np.arange(0, len(y))[::10],
    #tickformatstops not working with ticktextZ
# )


for pt in [750 ,1080]:
# for pt in [31]:
    draw_slope_line_at_point(
        fig, 
        x= np.arange(0, len(y)),
        #x=final_df.index,
        y = y_value_at_point,
        slope_at_point=slope_at_point,
        ind = pt)
    
    fig.add_annotation(x=pt, y=y_value_at_point[pt],
            text=f'''Slope: {slope_at_point[pt]:.2f}\t {final_df.index.strftime('%Y-%m-%d')[pt]}''',
            showarrow=True,
            arrowhead=1)


fig.update_layout(
    hovermode='x unified',
)
    
fig.show()


fig = px.scatter(
    x=only_covid.index,
    y=only_covid.covid_deaths, 
    trendline="ols",
    trendline_color_override="red",
    opacity=.5,
    title='Deaths per year'
)
fig.show()


fig = px.scatter(
    x=only_covid.index,
    y=only_covid.covid_deaths, 
    trendline="lowess",
    trendline_color_override="red",
    trendline_options=dict(frac=0.1),
    opacity=.5,
    title='Deaths per year'
)
fig.show()

# regression params not available for lowess


poly_degree = 10


y = only_covid.covid_deaths.values
x = np.arange(0, len(y))
x = x.reshape(-1, 1)

# just for checking with sklearn implementation
poly = PolynomialFeatures(degree=poly_degree, include_bias=False)
poly_features = poly.fit_transform(x)
poly_reg_model = LinearRegression()
poly_reg_model.fit(poly_features, y)


#-----
fitted_params = np.polyfit(np.arange(0, len(y)), y, poly_degree )

polynomials = np.poly1d(fitted_params)

derivatives = np.polyder(polynomials)

y_value_at_point = polynomials(x).flatten()

slope_at_point = np.polyval(derivatives, np.arange(0, len(y)))
#-----



# x_range_ordinal_poly = np.linspace(x.min(), x.max(), len(y))

# poly_pred_x = poly.fit_transform(x_range_ordinal_poly.reshape(-1, 1))


# print(f'''
# x: {len(x), x},
# y: {len(y), y},
# ''')

print(f'''
fittedparams: {fitted_params, fitted_params},

derivs: {derivatives},

y vals at point: {y_value_at_point, len(y_value_at_point)},

slope at point: {slope_at_point}''')

fittedparams: (array([-3.29416755e-13,  1.26451941e-10, -2.02479035e-08,  1.77003587e-06,
       -9.26507045e-05,  2.99527695e-03, -5.96424960e-02,  7.09907843e-01,
       -4.66671787e+00,  1.25294601e+01,  1.39709027e+01]), array([-3.29416755e-13,  1.26451941e-10, -2.02479035e-08,  1.77003587e-06,
       -9.26507045e-05,  2.99527695e-03, -5.96424960e-02,  7.09907843e-01,
       -4.66671787e+00,  1.25294601e+01,  1.39709027e+01])),

derivs:             9             8            7             6             5
-3.294e-12 x + 1.138e-09 x - 1.62e-07 x + 1.239e-05 x - 0.0005559 x
            4          3        2
 + 0.01498 x - 0.2386 x + 2.13 x - 9.333 x + 12.53,

y vals at point: (array([13.97090266, 22.4868146 , 25.17807482, 24.55934231, 22.3022525 ,
       19.45536845, 16.62054399, 14.09204724, 11.96414985, 10.21228534,
        8.75231638,  7.4819266 ,  6.30766423,  5.1607125 ,  4.00404452,
        2.83323543,  1.67285297,  0.57002568, -0.41350281, -1.20978868,
       -1.75276229, -1.98536217, -1.86531298, -1.36929271, -0.49537231,
        0.73627198,  2.2842772 ,  4.0884688 ,  6.0734584 ,  8.1531869 ,
       10.23610819, 12.23069481, 14.05094648, 15.62159384, 16.88271135,
       17.7934848 , 18.33491844, 18.51131322, 18.35040002, 17.90206911,
       17.23569723, 16.43613666, 15.59849449, 14.82189396, 14.20247212,
       13.82592783, 13.75999048, 14.04723062, 14.69867926, 15.68875999,
       16.95206751, 18.38254582, 19.83562782, 21.13389493, 22.07679833,
       22.45495252, 22.06946502, 20.75670213, 18.41880923, 15.06020257,
       10.83012866,  6.07124354,  1.37399871, -2.36356978, -3.87126782,
       -1.4382456 ,  7.1582941 ]), 67),

slope at point: [ 1.25294601e+01  5.10161060e+00  6.95528608e-01 -1.65803685e+00
 -2.68430654e+00 -2.91145774e+00 -2.71092751e+00 -2.33169590e+00
 -1.92917152e+00 -1.58926187e+00 -1.34817221e+00 -1.20843916e+00
 -1.15166889e+00 -1.14841469e+00 -1.16559480e+00 -1.17181845e+00
 -1.14095682e+00 -1.05426499e+00 -9.01332028e-01 -6.80108222e-01
 -3.96231860e-01 -6.18522935e-02  3.05878322e-01  6.86495374e-01
  1.05796699e+00  1.39838761e+00  1.68753311e+00  1.90820892e+00
  2.04733993e+00  2.09676685e+00  2.05372861e+00  1.92102383e+00
  1.70685712e+00  1.42438670e+00  1.09100042e+00  7.27355606e-01
  3.56226146e-01  1.20639674e-03 -3.14673055e-01 -5.70358814e-01
 -7.48155426e-01 -8.35009954e-01 -8.23636397e-01 -7.13416735e-01
 -5.11017275e-01 -2.30662127e-01  1.05990062e-01  4.70414455e-01
  8.28275109e-01  1.14100051e+00  1.36790420e+00  1.46886486e+00
  1.40756859e+00  1.15530372e+00  6.95284288e-01  2.74633463e-02
 -8.26218782e-01 -1.81622405e+00 -2.85953514e+00 -3.83361391e+00
 -4.57033195e+00 -4.85002765e+00 -4.39586760e+00 -2.86871468e+00
  1.37268660e-01  5.09802684e+00  1.25617435e+01]


fig = px.scatter(
    x=np.arange(0, len(y)),
    y=only_covid.covid_deaths, 
    opacity=.5,
    title='Deaths per year'
)


fig.add_traces(
    go.Scatter(
        x=np.arange(0, len(y)),
        #x=final_df.index.strftime('%Y-%m-%d').to_list(),
        y=y_value_at_point, 
        name='Polynomial regression Fit 2'))

for pt in [31]:
    draw_slope_line_at_point(
        fig, 
        x= np.arange(0, len(y)),
        #x=final_df.index,
        y = y_value_at_point,
        slope_at_point=slope_at_point,
        ind = pt)
    
    fig.add_annotation(
        x=pt, 
        y=y_value_at_point[pt],
        text=f'''Slope: {slope_at_point[pt]:.2f}\t {only_covid.index.strftime('%Y-%m-%d')[pt]}''',
        showarrow=True,
        arrowhead=1)


fig.update_layout(
    hovermode='x unified',
)
    
fig.show()


traces = []
animation_dicts = dict()


traces.append(
            go.Scatter(
                x=np.arange(0, len(y)),
                y=only_covid.covid_deaths, 
                name='Covid deaths',
                mode='lines',
                opacity=.5,
                line=dict(width=1.5)
            
            ))

# traces.append(
#             go.Scatter(
#                 x=np.arange(0, len(y)),
#                 y=only_covid.covid_deaths, 
#                 mode='lines',
#                 opacity=.5,
#                 line={'shape': 'spline', 'smoothing': 1.3}
#             ))

traces.append(
            go.Scatter(
                x=np.arange(0, len(y)),
                y=only_covid.covid_deaths,
                name='Covid deaths',
                mode='markers',
                opacity=.5,
                line=dict(width=1)
            
            ))

traces.append(
    go.Scatter(
        x=np.arange(0, len(y)),
        #x=final_df.index.strftime('%Y-%m-%d').to_list(),
        y=y_value_at_point, 
        name='Polynomial regression Fit',
                    mode='lines',
                opacity=.5,
                line=dict(width=1.5)
    ))



for pt in np.arange(0, len(y)):#[31, 60]:
    x_vals, y_vals = draw_slope_line_at_point(
        fig, 
        x= np.arange(0, len(y)),
        y = y_value_at_point, 
        slope_at_point=slope_at_point,
        ind = pt)
    
    animation_dicts[pt]= [x_vals, y_vals]
    
#     traces.append(
#             go.Scatter(
#                 x=x_vals,
#                 y=y_vals, 
#                 mode='lines',
#                 opacity=.4,
#                 line=dict(width=1.5)))
    
#     fig.add_annotation(
#         x=pt, 
#         y=y_value_at_point[pt],
#         text=f'''Slope: {slope_at_point[pt]:.2f}\t {only_covid.index.strftime('%Y-%m-%d')[pt]}''',
#         showarrow=True,
#         arrowhead=1)
    
    
frame_data = [] 
slider_steps = []

for k in range(0, len(final_df)):
        
#     frame_data.append(
#         dict(data=
        
#             [dict(
#                 type = 'scatter',
#                 x=np.arange(0, len(y))[:k],
#                 y=only_covid.covid_deaths[:k]
#                 )]
#             )
#     )


    # add slope lines
    if k in animation_dicts.keys():
        frame_data.append(
            dict(data=
                [dict(
                    type = 'scatter',
                    x=animation_dicts[k][0],
                    y=animation_dicts[k][1],
                    mode='lines',
                    line={'dash': 'dash', 'color': 'green'}
                )]
            )
        )
        
        slider_steps.append( 
            {"args": [
                frame_data[k],
                {"frame": {"duration": 300, "redraw": False},
                 "mode": "immediate",
                 "transition": {"duration": 300}}
                ],
            "label": k,
            "method": "animate"}
       )
    
    

all_frames = frame_data



frames=all_frames
    
    
    
#     dict(
#         data=[
#             dict(
#                 type = 'scatter',
#                 x=np.arange(0, len(y))[:k],
#                 y=only_covid.covid_deaths[:k]
#             ),
#             dict(
#                 type = 'scatter',
#                 x=animation_dicts[k][0],
#                 y=animation_dicts[k][1]
#             ),
#         ]
#     )
    
#     for k in range(0, len(final_df))

sliders_dict = {
    "active": 0,
    "yanchor": "top",
    "xanchor": "left",
    "currentvalue": {
        "font": {"size": 20},
        "prefix": "Week:",
        "visible": True,
        "xanchor": "right"
    },
    "transition": {"duration": 300, "easing": "cubic-in-out"},
    "pad": {"b": 10, "t": 50},
    "len": 0.9,
    "x": 0.1,
    "y": 0,
    "steps": slider_steps
}


layout = go.Layout(
        xaxis={"range": [0, len(y)], "title": "weeks"},
                    sliders=[sliders_dict],
#                    showlegend=False,
                   hovermode='x unified',
                   updatemenus=[
                        dict(
                            type='buttons', 
                            buttons=[
                                dict(
                                        label='Show tangents',
                                    method='animate',
                                    args=[None, 
                                      dict(frame=dict(duration=200, 
                                                      redraw=False),
                                                      transition=dict(duration=0),
                                                      fromcurrent=True,
                                                      mode='immediate')
                                 ]),
                                {
                                    "args": [[None], {"frame": {"duration": 0, "redraw": False},
                                                      "mode": "immediate",
                                                      "transition": {"duration": 0}}],
                                    "label": "Pause",
                                    "method": "animate"
                                }
                            ],
                            
                            direction="left",
                            pad= {"r": 10, "t": 87},
                            showactive= False,
                            x=0.1,
                            xanchor= "right",
                            y= 0,
                            yanchor= "top"
                        )
                    ]              
                  )



fig = go.Figure(
    data=traces,
    frames=frames,
    layout=layout)

fig.show()


# pio.write_html(fig, file='figure.html', auto_open=True)

	C-KALWOCHE-0	C-B00-0	C-ALTERGR65-0	C-C11-0	F-ANZ-1
0	KALW-200001	B00-1	ALTERSGR65-1	C11-1	8
1	KALW-200001	B00-1	ALTERSGR65-1	C11-2	2
2	KALW-200001	B00-1	ALTERSGR65-2	C11-1	25
3	KALW-200001	B00-1	ALTERSGR65-2	C11-2	33
4	KALW-200001	B00-2	ALTERSGR65-1	C11-1	7
...	...	...	...	...	...
41998	KALW-202223	B00-8	ALTERSGR65-2	C11-2	28
41999	KALW-202223	B00-9	ALTERSGR65-1	C11-1	24
42000	KALW-202223	B00-9	ALTERSGR65-1	C11-2	13
42001	KALW-202223	B00-9	ALTERSGR65-2	C11-1	125
42002	KALW-202223	B00-9	ALTERSGR65-2	C11-2	159

	Datum	BundeslandID	Name	BestaetigteFaelleBundeslaender	Todesfaelle	Genesen	Hospitalisierung	Intensivstation	Testungen	TestungenPCR	TestungenAntigen
0	2021-03-01T09:30:00+01:00	1	Burgenland	12657	239	11701	43	10	638575	155435	483140
1	2021-03-01T09:30:00+01:00	2	Kärnten	29225	683	27316	96	15	675557	217933	457624
2	2021-03-01T09:30:00+01:00	3	Niederösterreich	74538	1350	67900	349	82	3400756	1141984	2258772
3	2021-03-01T09:30:00+01:00	4	Oberösterreich	86409	1515	82567	120	20	2162517	546777	1615740
4	2021-03-01T09:30:00+01:00	5	Salzburg	37327	493	35318	71	15	823353	274598	548755
...	...	...	...	...	...	...	...	...	...	...	...
4845	2022-06-28T09:30:00+02:00	6	Steiermark	561729	3399	551626	102	8	21098196	5816694	15281502
4846	2022-06-28T09:30:00+02:00	7	Tirol	367338	939	361988	77	2	9590745	3845911	5744834
4847	2022-06-28T09:30:00+02:00	8	Vorarlberg	211284	549	208983	20	2	7088523	1543803	5544720
4848	2022-06-28T09:30:00+02:00	9	Wien	955399	4028	914608	199	23	65808360	58193874	7614486
4849	2022-06-28T09:30:00+02:00	10	Österreich	4403444	18768	4293115	842	47	190225970	96219579	94006391

	cal_week	state	C-ALTERGR65-0	gender	counts	year	formatted_date	date	conv_date
0	01	B00-1	ALTERSGR65-1	C11-1	8	2000	2000010	2000-01-09	730128
1	01	B00-1	ALTERSGR65-1	C11-2	2	2000	2000010	2000-01-09	730128
2	01	B00-1	ALTERSGR65-2	C11-1	25	2000	2000010	2000-01-09	730128
3	01	B00-1	ALTERSGR65-2	C11-2	33	2000	2000010	2000-01-09	730128
4	01	B00-2	ALTERSGR65-1	C11-1	7	2000	2000010	2000-01-09	730128
...	...	...	...	...	...	...	...	...	...
41998	23	B00-8	ALTERSGR65-2	C11-2	28	2022	2022230	2022-06-12	738318
41999	23	B00-9	ALTERSGR65-1	C11-1	24	2022	2022230	2022-06-12	738318
42000	23	B00-9	ALTERSGR65-1	C11-2	13	2022	2022230	2022-06-12	738318
42001	23	B00-9	ALTERSGR65-2	C11-1	125	2022	2022230	2022-06-12	738318
42002	23	B00-9	ALTERSGR65-2	C11-2	159	2022	2022230	2022-06-12	738318

	counts	conv_date
date
2000-01-09	1867	26284608
2000-01-16	1902	26284860
2000-01-23	2027	26285112
2000-01-30	1940	26285364
2000-02-06	1928	26285616
...	...	...
2022-05-15	1568	25840150
2022-05-22	1592	25840395
2022-05-29	1508	26578944
2022-06-05	1554	26579196
2022-06-12	1560	26579448

	Datum	BundeslandID	Name	BestaetigteFaelleBundeslaender	Todesfaelle	Genesen	Hospitalisierung	Intensivstation	Testungen	TestungenPCR	TestungenAntigen
9	2021-03-01T09:30:00+01:00	10	Österreich	460849	8574	432016	1353	290	15003345	5395666	9607679
19	2021-03-02T09:30:00+01:00	10	Österreich	462769	8605	433873	1427	296	15358139	5456284	9901855
29	2021-03-03T09:30:00+01:00	10	Österreich	465322	8625	435669	1415	313	15602870	5507472	10095398
39	2021-03-04T09:30:00+01:00	10	Österreich	467646	8652	437202	1425	302	15864120	5557591	10306529
49	2021-03-05T09:30:00+01:00	10	Österreich	470314	8669	439101	1421	326	16123615	5610644	10512971
...	...	...	...	...	...	...	...	...	...	...	...
4809	2022-06-24T09:30:00+02:00	10	Österreich	4370852	18757	4271277	662	42	189894351	95918261	93976090
4819	2022-06-25T09:30:00+02:00	10	Österreich	4379764	18758	4276737	677	44	189984714	96003641	93981073
4829	2022-06-26T09:30:00+02:00	10	Österreich	4386857	18760	4281819	686	46	190048498	96065222	93983276
4839	2022-06-27T09:30:00+02:00	10	Österreich	4393255	18764	4286495	777	48	190108723	96122902	93985821
4849	2022-06-28T09:30:00+02:00	10	Österreich	4403444	18768	4293115	842	47	190225970	96219579	94006391

Analysing Austrias deaths and their Covid-19 correlation¶

Datasets¶

Goal of the analysis¶

Analysis¶

Load data¶

Pandas Profiling¶

Data Cleaning¶

Clean Dates¶

Aggregate dates¶

Clean 2nd dataset¶

Merge datasets¶

Remove outliers¶

Data visualization - All deaths¶

Simple regression¶

Sklearn Implementation¶

Polynomial Regression¶

Calculate polynomial slope at certain point¶

Visualization - Covid deaths¶

Polynomial¶

Interactive tangent visualization¶

Conclusion¶

	formatted_date	deaths	date
9	2021-03-01	8574	2021-03-01
19	2021-03-02	8605	2021-03-02
29	2021-03-03	8625	2021-03-03
39	2021-03-04	8652	2021-03-04
49	2021-03-05	8669	2021-03-05
...	...	...	...
4809	2022-06-24	18757	2022-06-24
4819	2022-06-25	18758	2022-06-25
4829	2022-06-26	18760	2022-06-26
4839	2022-06-27	18764	2022-06-27
4849	2022-06-28	18768	2022-06-28

	nr_deaths	covid_deaths
count	1167.000000	67.000000
mean	1513.545844	11.268657
std	207.013936	10.300900
min	1185.000000	0.000000
25%	1392.000000	3.000000
50%	1473.000000	6.000000
75%	1578.000000	18.500000
max	3953.000000	43.000000