Skip to content

build_data_evol_ct

build_data_evol_ct(data_source: DataFrame, col_new_ct: str, to_concat: bool, col_ct: str = 'salaire_super_brut')

Creates variables of interest concerning the evolution of the cost of labor.

Parameters:

Name Type Description Default
data_source DataFrame

The source data.

required
col_new_ct str

The column of the new cost of labor.

required
to_concat bool

Whether to concatenate the result to the source data.

required
col_ct str

The column of the cost of labor. Defaults to 'salaire_super_brut'.

'salaire_super_brut'

Returns:

Type Description
DataFrame

The data with the cost of labor evolution variables.

Source code in bozio_wasmer_simulations/description/datasets.py
def build_data_evol_ct(
    data_source: pd.DataFrame,
    col_new_ct: str,
    to_concat: bool,
    col_ct: str = "salaire_super_brut",
):
    """
    Creates variables of interest concerning the evolution of the cost of labor.

    Args:
        data_source (pd.DataFrame): The source data.
        col_new_ct (str): The column of the new cost of labor.
        to_concat (bool): Whether to concatenate the result to the source data.
        col_ct (str, optional): The column of the cost of labor. Defaults to 'salaire_super_brut'.

    Returns:
        (pd.DataFrame): The data with the cost of labor evolution variables.
    """
    # Copie indépendante des grandeurs d'intérêt du jeu de données
    data_evol_ct = data_source[["siren", "weights", col_new_ct, col_ct]].copy()

    # Création du suffixe
    suffix = col_new_ct[(col_new_ct.find(col_ct) + len(col_ct) + 1) :]

    # Calcul de la différence du coût du travail
    data_evol_ct[f"diff_ct_{suffix}"] = data_evol_ct[col_new_ct] - data_evol_ct[col_ct]

    # Calcul de l'évolution des salaires
    data_evol_ct[f"evol_ct_{suffix}"] = (
        data_evol_ct[col_new_ct] - data_evol_ct[col_ct]
    ) / data_evol_ct[col_ct]

    # Ajout de l'évolution de la masse salariale au niveau de l'entreprise
    # Calcul de grandeurs d'intérêt
    data_evol_ct[f"diff_pond_ct_{suffix}"] = (
        data_evol_ct[col_new_ct] - data_evol_ct[col_ct]
    ).multiply(other=data_evol_ct["weights"])
    data_evol_ct["pond_ct"] = data_evol_ct[col_ct].multiply(
        other=data_evol_ct["weights"]
    )
    data_effet_siren = (
        data_evol_ct.groupby("siren")[f"diff_pond_ct_{suffix}"]
        .sum()
        .divide(other=data_evol_ct.groupby("siren")["pond_ct"].sum())
        .reset_index()
        .rename({0: f"evol_ms_{suffix}"}, axis=1)
    )
    # Appariement de la variation de la masse salariale par siren
    data_evol_ct = pd.merge(
        left=data_evol_ct,
        right=data_effet_siren,
        on="siren",
        how="left",
        validate="many_to_one",
    )
    # Suppression de la base des effets au niveau du Siren
    del data_effet_siren

    if to_concat:
        # Concaténarion au jeu de données d'origine
        data_source = pd.concat(
            [
                data_source,
                data_evol_ct.drop(
                    [
                        "siren",
                        "weights",
                        col_new_ct,
                        col_ct,
                        f"diff_pond_ct_{suffix}",
                        "pond_ct",
                    ],
                    axis=1,
                ),
            ],
            axis=1,
            join="outer",
        )
        # Suppression des jeux de données des évolutions
        del data_evol_ct
        return data_source
    else:
        return data_evol_ct