Skip to content

EmpiricalSimulator

A class used to simulate empirical data.

Attributes:

Name Type Description
logger Logger

A logger instance.

Source code in bozio_wasmer_simulations/simulation/empirical/base.py
class EmpiricalSimulator:
    """
    A class used to simulate empirical data.

    Attributes:
        logger (logging.Logger):
            A logger instance.

    """

    # Initialisation
    def __init__(
        self,
        log_filename: Optional[os.PathLike] = os.path.join(
            FILE_PATH.parents[3], "logs/empirical_simulation.log"
        ),
    ) -> None:
        """
        Constructs all the necessary attributes for the EmpiricalSimulator object.

        Args:
            log_filename (os.PathLike, optional): The path to the log file. Defaults to os.path.join(FILE_PATH.parents[3], 'logs/empirical_simulation.log').


        """
        # Initialisation du logger
        self.logger = _init_logger(filename=log_filename)

    # Fonction d'itération d'une simulation
    def iterate_simulation(
        self,
        data: pd.DataFrame,
        tax_benefit_system: TaxBenefitSystem,
        year: int,
        list_var_simul: List[str],
        list_var_exclude: Optional[List[str]] = [],
        inplace: Optional[bool] = True,
    ) -> pd.DataFrame:
        """
        Iterates a simulation.

        Args:
            data (pd.DataFrame): The data to simulate.
            tax_benefit_system (FranceTaxBenefitSystem): The tax benefit system.
            year (int): The year of the simulation.
            list_var_simul (List[str]): The list of variables to simulate.
            list_var_exclude (Optional[List[str]], optional): The list of variables to exclude. Defaults to [].
            inplace (Optional[bool], optional): Whether to perform the simulation in place. Defaults to True.

        Returns:
            (pd.DataFrame): The simulated data.
        """
        # Disjonction de cas suivant la nécessité de réaliser une copie indépendante du jeu de données
        if inplace:
            data_res = data
        else:
            data_res = data.copy()

        # Initialisation des paramètres de la simulation
        simulation = SimulationBuilder().build_default_simulation(
            tax_benefit_system, len(data_res)
        )
        # Ajout de l'ensemble des données
        # /!\ On ajout 'smic_proratisé' aux variables à exclure de l'imputation pour contourner l'écueil de la mauvaise transition entre valeurs mensuelles et annuelles # + ['smic_proratise']
        # Finalement retiré car les rému restent divisées par 12 et ne sont pas intersectées avec la durée du contrat
        # Il s'agit sans doute d'un point à améliorer dans le package
        for caracteristic in np.setdiff1d(data_res.columns, list_var_exclude):
            try:  # if not (caracteristic in ['id', 'siren']) :
                simulation.set_input(
                    caracteristic, year, data_res[caracteristic].to_numpy()
                )
                # logging
                self.logger.info(
                    f"Successfully initialized {caracteristic} in the french tax benefit system"
                )
            except Exception as e:
                # Logging
                self.logger.warning(
                    f"Cannot initialize {caracteristic} in the french tax benefit system : {e}"
                )
                pass
        # Ajout des cotisations et des allègements généraux
        for var_simul in tqdm(list_var_simul):
            data_res[var_simul] = simulation.calculate_add(var_simul, year)
            # Logging
            self.logger.info(f"Successfully simulated {var_simul} for period {year}")

        return data_res

    # Fonction de simulation du SMIC proratisé
    def simulate_smic_proratise(
        self,
        data: pd.DataFrame,
        year: int,
        list_var_exclude: Optional[List[str]] = [],
        inplace: Optional[bool] = True,
    ) -> pd.DataFrame:
        """
        Simulates the prorated minimum wage.

        Args:
            data (pd.DataFrame): The data to simulate.
            year (int): The year of the simulation.
            list_var_exclude (Optional[List[str]], optional): The list of variables to exclude. Defaults to [].
            inplace (Optional[bool], optional): Whether to perform the simulation in place. Defaults to True.

        Returns:
            (pd.DataFrame): The simulated data.
        """
        # Initialisation des paramètres du système sociofiscal français
        tax_benefit_system = FranceTaxBenefitSystem()

        # Simulation du SMIC proratisé pour l'année des données
        data = self.iterate_simulation(
            data=data,
            tax_benefit_system=tax_benefit_system,
            year=year,
            list_var_simul=["smic_proratise"],
            list_var_exclude=list_var_exclude,
            inplace=inplace,
        )

        return data

iterate_simulation

iterate_simulation(data: DataFrame, tax_benefit_system: TaxBenefitSystem, year: int, list_var_simul: List[str], list_var_exclude: Optional[List[str]] = [], inplace: Optional[bool] = True) -> DataFrame

Iterates a simulation.

Parameters:

Name Type Description Default
data DataFrame

The data to simulate.

required
tax_benefit_system FranceTaxBenefitSystem

The tax benefit system.

required
year int

The year of the simulation.

required
list_var_simul List[str]

The list of variables to simulate.

required
list_var_exclude Optional[List[str]]

The list of variables to exclude. Defaults to [].

[]
inplace Optional[bool]

Whether to perform the simulation in place. Defaults to True.

True

Returns:

Type Description
DataFrame

The simulated data.

Source code in bozio_wasmer_simulations/simulation/empirical/base.py
def iterate_simulation(
    self,
    data: pd.DataFrame,
    tax_benefit_system: TaxBenefitSystem,
    year: int,
    list_var_simul: List[str],
    list_var_exclude: Optional[List[str]] = [],
    inplace: Optional[bool] = True,
) -> pd.DataFrame:
    """
    Iterates a simulation.

    Args:
        data (pd.DataFrame): The data to simulate.
        tax_benefit_system (FranceTaxBenefitSystem): The tax benefit system.
        year (int): The year of the simulation.
        list_var_simul (List[str]): The list of variables to simulate.
        list_var_exclude (Optional[List[str]], optional): The list of variables to exclude. Defaults to [].
        inplace (Optional[bool], optional): Whether to perform the simulation in place. Defaults to True.

    Returns:
        (pd.DataFrame): The simulated data.
    """
    # Disjonction de cas suivant la nécessité de réaliser une copie indépendante du jeu de données
    if inplace:
        data_res = data
    else:
        data_res = data.copy()

    # Initialisation des paramètres de la simulation
    simulation = SimulationBuilder().build_default_simulation(
        tax_benefit_system, len(data_res)
    )
    # Ajout de l'ensemble des données
    # /!\ On ajout 'smic_proratisé' aux variables à exclure de l'imputation pour contourner l'écueil de la mauvaise transition entre valeurs mensuelles et annuelles # + ['smic_proratise']
    # Finalement retiré car les rému restent divisées par 12 et ne sont pas intersectées avec la durée du contrat
    # Il s'agit sans doute d'un point à améliorer dans le package
    for caracteristic in np.setdiff1d(data_res.columns, list_var_exclude):
        try:  # if not (caracteristic in ['id', 'siren']) :
            simulation.set_input(
                caracteristic, year, data_res[caracteristic].to_numpy()
            )
            # logging
            self.logger.info(
                f"Successfully initialized {caracteristic} in the french tax benefit system"
            )
        except Exception as e:
            # Logging
            self.logger.warning(
                f"Cannot initialize {caracteristic} in the french tax benefit system : {e}"
            )
            pass
    # Ajout des cotisations et des allègements généraux
    for var_simul in tqdm(list_var_simul):
        data_res[var_simul] = simulation.calculate_add(var_simul, year)
        # Logging
        self.logger.info(f"Successfully simulated {var_simul} for period {year}")

    return data_res

simulate_smic_proratise

simulate_smic_proratise(data: DataFrame, year: int, list_var_exclude: Optional[List[str]] = [], inplace: Optional[bool] = True) -> DataFrame

Simulates the prorated minimum wage.

Parameters:

Name Type Description Default
data DataFrame

The data to simulate.

required
year int

The year of the simulation.

required
list_var_exclude Optional[List[str]]

The list of variables to exclude. Defaults to [].

[]
inplace Optional[bool]

Whether to perform the simulation in place. Defaults to True.

True

Returns:

Type Description
DataFrame

The simulated data.

Source code in bozio_wasmer_simulations/simulation/empirical/base.py
def simulate_smic_proratise(
    self,
    data: pd.DataFrame,
    year: int,
    list_var_exclude: Optional[List[str]] = [],
    inplace: Optional[bool] = True,
) -> pd.DataFrame:
    """
    Simulates the prorated minimum wage.

    Args:
        data (pd.DataFrame): The data to simulate.
        year (int): The year of the simulation.
        list_var_exclude (Optional[List[str]], optional): The list of variables to exclude. Defaults to [].
        inplace (Optional[bool], optional): Whether to perform the simulation in place. Defaults to True.

    Returns:
        (pd.DataFrame): The simulated data.
    """
    # Initialisation des paramètres du système sociofiscal français
    tax_benefit_system = FranceTaxBenefitSystem()

    # Simulation du SMIC proratisé pour l'année des données
    data = self.iterate_simulation(
        data=data,
        tax_benefit_system=tax_benefit_system,
        year=year,
        list_var_simul=["smic_proratise"],
        list_var_exclude=list_var_exclude,
        inplace=inplace,
    )

    return data