Skip to content

CoreSimulation

A class used to build the core simulation data.

Source code in bozio_wasmer_simulations/simulation/empirical/base.py
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
class CoreSimulation(EmpiricalSimulator):
    """
    A class used to build the core simulation data.

    """

    # Initialisation
    def __init__(
        self,
        project: str,
        log_filename: Optional[os.PathLike] = os.path.join(
            FILE_PATH.parents[3], "logs/core_simulation.log"
        ),
    ) -> None:
        """
        Constructs all the necessary attributes for the CoreSimulation object.

        Args:
            log_filename (os.PathLike, optional): The path to the log file. Defaults to os.path.join(FILE_PATH.parents[3], 'logs/core_simulation.log').

        Returns:
            None
        """
        # Initialisation du projet CASD
        self.project = project
        # Initialisation du logger
        super().__init__(log_filename=log_filename)

    # Fonction auxiliaire d'importation du zonage des Zones de Revitalisation Rurales (ZRR)
    @property
    def zonage_zrr(self) -> List[str]:
        """
        Imports the rural revitalization zones (ZRR) zoning.

        Returns:
            (List[str]): The list of rural revitalization zones.
        """
        # Importation des données
        data_zonage_zrr = pd.read_excel(
            os.path.join(
                FILE_PATH.parents[3], "data/diffusion-zonages-zrr-cog2021.xls"
            ),
            skiprows=5,
            dtype={"CODGEO": str},
        )
        # Sélection des données
        list_zonage_zrr = data_zonage_zrr.loc[
            data_zonage_zrr["ZRR_SIMP"].isin(
                ["C - Classée en ZRR", "P - Commune partiellement classée en ZRR"]
            ),
            "CODGEO",
        ].tolist()

        return list_zonage_zrr

    # Fonction auxiliaire d'importation du zonage des Zones de Restructuration de la Défense (ZRD)
    @property
    def zonage_zrd(self) -> List[str]:
        """
        Imports the defense restructuring zones (ZRD) zoning.

        Returns:
            (List[str]): The list of defense restructuring zones.
        """
        # Importation des données
        list_zonage_zrd = pd.read_excel(
            os.path.join(FILE_PATH.parents[3], "data/diffusion-zonages-zrd-2020.xls"),
            skiprows=5,
            dtype={"CODGEO": str},
        )["CODGEO"].tolist()

        return list_zonage_zrd

    # Fonction auxiliaire de construction des colonnes des DADS
    # @property
    def columns_dads(self, year: int) -> List[str]:
        """
        Builds the columns for the DADS data.

        Args:
            year (int): The year.

        Returns:
            (List[str]): The list of columns.
        """
        # Liste des variables à conserver lors de l'import
        columns = params["DADS"]["COLONNES"]
        # Ajout des primes de partage de la valeur si on se trouve en 2022
        if year == 2022:
            columns += params["DADS"]["COLONNES_2022"]
        return columns

    # Fonction auxilaire de construction des données DADS
    def build_data_dads(
        self, year: int, data: Optional[Union[pd.DataFrame, None]] = None
    ) -> None:
        """
        Builds the DADS data.

        Args:
            year (int): The year.
            data (Optional[Union[pd.DataFrame, None]], optional): The data. Defaults to None.

        Returns:
            None
        """
        if data is not None:
            self._build_data_dads_from_dataframe(data=data, year=year)
        else:
            self.data_dads = self._init_data_dads(year=year)

    # Fonction auxiliaire de construction des données DADS à partir d'un DataFrame
    def _build_data_dads_from_dataframe(self, data: pd.DataFrame, year: int) -> None:
        """
        Builds the DADS data from a DataFrame.

        Args:
            data (pd.DataFrame): The data.
            year (int): The year.

        Returns:
            None
        """
        # Vérification que l'ensemble des variables attendues sont dans le jeu de données
        # Variables manquantes
        missing_variables = np.setdiff1d(
            self.columns_dads(year=year), data.columns.tolist()
        ).tolist()
        if missing_variables == []:
            self.data_dads = data
            # Logging
            self.logger.info("Successfully build data_dads")
        else:
            # Logging
            self.logger.error(
                f"Given DataFrame should contain {missing_variables} as columns"
            )
            # Erreur
            raise ValueError(
                f"Given DataFrame should contain {missing_variables} as columns"
            )

    # Fonction auxiliaire d'importation et de retraitement des DADS
    def _init_data_dads(self, year: int) -> pd.DataFrame:
        """
        Imports and preprocesses the DADS data.

        Args:
            year (int): The year.

        Returns:
            (pd.DataFrame): The preprocessed DADS data.
        """
        # Filtre sur les lignes (sélection des postes principaux de l'année du millésime)
        filter_dads = [("annee", "==", f"{year}"), ("pps", "==", "1")]

        # Chargement des données
        data_dads = load_dads(
            project=self.project, year=year, columns=self.columns_dads(year=year), filters=filter_dads
        )

        # Construction de l'âge
        data_dads["age"] = -(data_dads["annee_naiss"].subtract(other=year))
        # Complétion des Nan
        data_dads["age"] = data_dads["age"].fillna(year - 1970)

        # Restriction sur le champ du secteur privé et aux salariés âgés de 18 à 64 ans, en france métropolitaine
        data_dads = data_dads.loc[
            (~data_dads["domempl_empl"].isin(params["CHAMP"]["DOMEMPL_EXCLUDE"]))
            & (data_dads["age"] >= int(params["CHAMP"]["AGE_MIN"]))
            & (data_dads["age"] <= int(params["CHAMP"]["AGE_MAX"]))
            & (~data_dads["dept"].isin(params["CHAMP"]["DEPT_EXCLUDE"]))
        ]

        # Construction d'un identifiant
        data_dads.reset_index(drop=True, inplace=True)
        data_dads.reset_index(drop=False, names="id", inplace=True)

        # Logging
        self.logger.info("Successfully build data_dads")

        return data_dads

    # Fonction auxiliaire de preprocessing des DADS en vue d'une branchement avec openfisca
    def preprocess_dads_simulation(self, year: int) -> None:
        """
        Preprocesses the DADS data for simulation.

        Args:
            year (int): The year.

        Returns:
            None
        """
        # Preprocessing pour les allègements généraux
        self.data_dads = preprocess_dads_openfisca_ag(
            data_dads=self.data_dads,
            year=year,
            list_zonage_zrr=self.zonage_zrr,
            list_zonage_zrd=self.zonage_zrd,
        )

        # Suppression des variables inutiles pour les simulations
        self.data_dads.drop(
            np.setdiff1d(
                self.columns_dads(year=year) + ["pcs_2", "date_fin_contrat"],
                params["PREPROCESSING"]["KEEP"],
            ),
            axis=1,
            inplace=True,
            errors="ignore",
        )

        # Logging
        self.logger.info(
            "Successfully preprocessed data_dads to connect it with openfisca"
        )

    # Fonction auxiliaire d'ajout de poids
    def add_weights(self, year_data: int, year_simul: int) -> None:
        """
        Adds weights to the DADS data.

        Args:
            year_data (int): The year of the data.
            year_simul (int): The year of the simulation.

        Returns:
            None
        """
        # Simulation du SMIC proratisé
        # Simulation
        self.data_dads = self.simulate_smic_proratise(
            data=self.data_dads, year=year_data, list_var_exclude=[], inplace=True
        )

        # Si l'année des données ne coincide pas avec l'année des simulations, on met à jour les salaires pour qu'il corresponde au même niveau de SMIC
        if year_data != year_simul:
            # Renomination de la colonne simulée
            self.data_dads.rename(
                {"smic_proratise": f"smic_proratise_{year_data}"}, axis=1, inplace=True
            )
            # Simulation du SMIC proratisé pour l'année de simulation
            self.data_dads = self.simulate_smic_proratise(
                data=self.data_dads, year=year_simul, list_var_exclude=[], inplace=True
            )
            # Correction des salaires
            # Salaire en proportion du SMIC
            self.data_dads["salaire_brut_smic"] = (
                self.data_dads[["salaire_de_base", "remuneration_apprenti"]].sum(axis=1)
                / self.data_dads[f"smic_proratise_{year_data}"]
            )
            # Actualisation des réumnérations
            self.data_dads["salaire_de_base"] = np.where(
                self.data_dads["salaire_de_base"] > 0,
                self.data_dads["salaire_brut_smic"] * self.data_dads["smic_proratise"],
                0,
            )
            self.data_dads["remuneration_apprenti"] = np.where(
                self.data_dads["remuneration_apprenti"] > 0,
                self.data_dads["salaire_brut_smic"] * self.data_dads["smic_proratise"],
                0,
            )
            # Suppression du SMIC proratisé initialement calculé
            self.data_dads.drop(f"smic_proratise_{year_data}", axis=1, inplace=True)
        # Recréation d'un salaire brut
        self.data_dads["brut_s"] = self.data_dads[
            ["salaire_de_base", "remuneration_apprenti"]
        ].sum(axis=1)
        # Ajout des poids
        self.data_dads = add_weights_eqtp_accos(
            data_dads=self.data_dads,
            year=year_simul,
            var_eqtp="eqtp",
            var_sal_brut="brut_s",
            var_smic_proratise="smic_proratise",
        )
        # Suppression de la colonne de salaire brut
        self.data_dads.drop("brut_s", axis=1, inplace=True)

        # Logging
        self.logger.info("Successfully added accoss weights to data_dads")

    # Fonction auxiliaire de simulation
    def simulate(self, year: int) -> None:
        """
        Simulates the data.

        Args:
            year (int): The year.

        Returns:
            None
        """
        # Le salaire de base  et smic_proratisé sont des variables mensuelles dans Openfisca et les DADS sont des variables annuelles
        # Les deux variables ayant l'attribut set_input=set_input_divide_by_period mais smic_proratisé est calculé en tenant compte de la durée du contrat
        # Si on simule d'abord un smic proratisé et qu'on en crée une variable annuelle, on divisera par 12 les deux grandeurs, alors qu'il faudrait les intersecter les deux avec la durée du contrat
        # Le rapport smic_proratise/salaire_de_base ou salaire_de_base/smic_proratise reste alors juste.
        # Simulation du SMIC proratisé
        if "smic_proratise" not in self.data_dads.columns:
            self.data_dads = self.simulate_smic_proratise(
                data=self.data_dads, year=year, list_var_exclude=[], inplace=True
            )
        # Liste des variables à simuler
        list_var_simul = np.setdiff1d(params["VARIABLES"], ["smic_proratise"])
        # Initialisation des paramètres du système sociofiscal
        tax_benefit_system = FranceTaxBenefitSystem()
        # Itération de la simulation
        self.data_dads = self.iterate_simulation(
            data=self.data_dads,
            tax_benefit_system=tax_benefit_system,
            year=year,
            list_var_simul=list_var_simul,
            list_var_exclude=[],
            inplace=True,
        )
        # Retraitement des variables simulées
        self.data_dads = preprocess_simulated_variables(data=self.data_dads)
        # Renomination de la quotité de travail pour pallier la mauvaise gestion annuel/mensuel de la variable dans openfisca
        self.data_dads.rename({"eqtp": "quotite_de_travail"}, axis=1, inplace=True)

        # Logging
        self.logger.info(
            f"Successfully simulated {list_var_simul} on data_dads observations"
        )

    # Méthode construisant le jeu de données avec les variables simulées
    def build(
        self,
        year_data: int,
        year_simul: int,
        data: Optional[Union[pd.DataFrame, None]] = None,
    ) -> pd.DataFrame:
        """
        Builds the simulation data.

        Args:
            year_data (int): The year of the data.
            year_simul (int): The year of the simulation.
            data (Optional[Union[pd.DataFrame, None]], optional): The data. Defaults to None.

        Returns:
            (pd.DataFrame): The simulation data.
        """
        # Chargement du jeu de données
        self.build_data_dads(data=data, year=year_data)
        # Preprocessing
        self.preprocess_dads_simulation(year=year_data)
        # Ajout des poids
        self.add_weights(year_data=year_data, year_simul=year_simul)
        # Simulation des variables
        self.simulate(year=year_simul)

        # Construction de chiffres cadres aggrégés
        aggregated_numbers = (
            self.data_dads[
                [
                    "salaire_de_base",
                    "remuneration_apprenti",
                    "salaire_super_brut",
                    "salaire_super_brut_hors_allegements",
                    "exonerations_et_allegements",
                    "exoneration_cotisations_employeur_apprenti",
                    "exoneration_cotisations_employeur_tode",
                    "exoneration_cotisations_employeur_zrd",
                    "exoneration_cotisations_employeur_zrr",
                    "exoneration_cotisations_employeur_jei",
                    "exoneration_cotisations_employeur_stagiaire",
                    "allegement_general",
                    "allegement_cotisation_maladie",
                    "allegement_cotisation_allocations_familiales",
                    "versement_transport",
                    "prime_partage_valeur_exoneree",
                ]
            ]
            .multiply(other=self.data_dads["weights"], axis=0)
            .sum()
        )
        # Logging
        self.logger.info("Successfully build simulated DataFrame")
        self.logger.info(aggregated_numbers.to_string())

        return self.data_dads

zonage_zrd property

zonage_zrd: List[str]

Imports the defense restructuring zones (ZRD) zoning.

Returns:

Type Description
List[str]

The list of defense restructuring zones.

zonage_zrr property

zonage_zrr: List[str]

Imports the rural revitalization zones (ZRR) zoning.

Returns:

Type Description
List[str]

The list of rural revitalization zones.

add_weights

add_weights(year_data: int, year_simul: int) -> None

Adds weights to the DADS data.

Parameters:

Name Type Description Default
year_data int

The year of the data.

required
year_simul int

The year of the simulation.

required

Returns:

Type Description
None

None

Source code in bozio_wasmer_simulations/simulation/empirical/base.py
def add_weights(self, year_data: int, year_simul: int) -> None:
    """
    Adds weights to the DADS data.

    Args:
        year_data (int): The year of the data.
        year_simul (int): The year of the simulation.

    Returns:
        None
    """
    # Simulation du SMIC proratisé
    # Simulation
    self.data_dads = self.simulate_smic_proratise(
        data=self.data_dads, year=year_data, list_var_exclude=[], inplace=True
    )

    # Si l'année des données ne coincide pas avec l'année des simulations, on met à jour les salaires pour qu'il corresponde au même niveau de SMIC
    if year_data != year_simul:
        # Renomination de la colonne simulée
        self.data_dads.rename(
            {"smic_proratise": f"smic_proratise_{year_data}"}, axis=1, inplace=True
        )
        # Simulation du SMIC proratisé pour l'année de simulation
        self.data_dads = self.simulate_smic_proratise(
            data=self.data_dads, year=year_simul, list_var_exclude=[], inplace=True
        )
        # Correction des salaires
        # Salaire en proportion du SMIC
        self.data_dads["salaire_brut_smic"] = (
            self.data_dads[["salaire_de_base", "remuneration_apprenti"]].sum(axis=1)
            / self.data_dads[f"smic_proratise_{year_data}"]
        )
        # Actualisation des réumnérations
        self.data_dads["salaire_de_base"] = np.where(
            self.data_dads["salaire_de_base"] > 0,
            self.data_dads["salaire_brut_smic"] * self.data_dads["smic_proratise"],
            0,
        )
        self.data_dads["remuneration_apprenti"] = np.where(
            self.data_dads["remuneration_apprenti"] > 0,
            self.data_dads["salaire_brut_smic"] * self.data_dads["smic_proratise"],
            0,
        )
        # Suppression du SMIC proratisé initialement calculé
        self.data_dads.drop(f"smic_proratise_{year_data}", axis=1, inplace=True)
    # Recréation d'un salaire brut
    self.data_dads["brut_s"] = self.data_dads[
        ["salaire_de_base", "remuneration_apprenti"]
    ].sum(axis=1)
    # Ajout des poids
    self.data_dads = add_weights_eqtp_accos(
        data_dads=self.data_dads,
        year=year_simul,
        var_eqtp="eqtp",
        var_sal_brut="brut_s",
        var_smic_proratise="smic_proratise",
    )
    # Suppression de la colonne de salaire brut
    self.data_dads.drop("brut_s", axis=1, inplace=True)

    # Logging
    self.logger.info("Successfully added accoss weights to data_dads")

build

build(year_data: int, year_simul: int, data: Optional[Union[DataFrame, None]] = None) -> DataFrame

Builds the simulation data.

Parameters:

Name Type Description Default
year_data int

The year of the data.

required
year_simul int

The year of the simulation.

required
data Optional[Union[DataFrame, None]]

The data. Defaults to None.

None

Returns:

Type Description
DataFrame

The simulation data.

Source code in bozio_wasmer_simulations/simulation/empirical/base.py
def build(
    self,
    year_data: int,
    year_simul: int,
    data: Optional[Union[pd.DataFrame, None]] = None,
) -> pd.DataFrame:
    """
    Builds the simulation data.

    Args:
        year_data (int): The year of the data.
        year_simul (int): The year of the simulation.
        data (Optional[Union[pd.DataFrame, None]], optional): The data. Defaults to None.

    Returns:
        (pd.DataFrame): The simulation data.
    """
    # Chargement du jeu de données
    self.build_data_dads(data=data, year=year_data)
    # Preprocessing
    self.preprocess_dads_simulation(year=year_data)
    # Ajout des poids
    self.add_weights(year_data=year_data, year_simul=year_simul)
    # Simulation des variables
    self.simulate(year=year_simul)

    # Construction de chiffres cadres aggrégés
    aggregated_numbers = (
        self.data_dads[
            [
                "salaire_de_base",
                "remuneration_apprenti",
                "salaire_super_brut",
                "salaire_super_brut_hors_allegements",
                "exonerations_et_allegements",
                "exoneration_cotisations_employeur_apprenti",
                "exoneration_cotisations_employeur_tode",
                "exoneration_cotisations_employeur_zrd",
                "exoneration_cotisations_employeur_zrr",
                "exoneration_cotisations_employeur_jei",
                "exoneration_cotisations_employeur_stagiaire",
                "allegement_general",
                "allegement_cotisation_maladie",
                "allegement_cotisation_allocations_familiales",
                "versement_transport",
                "prime_partage_valeur_exoneree",
            ]
        ]
        .multiply(other=self.data_dads["weights"], axis=0)
        .sum()
    )
    # Logging
    self.logger.info("Successfully build simulated DataFrame")
    self.logger.info(aggregated_numbers.to_string())

    return self.data_dads

build_data_dads

build_data_dads(year: int, data: Optional[Union[DataFrame, None]] = None) -> None

Builds the DADS data.

Parameters:

Name Type Description Default
year int

The year.

required
data Optional[Union[DataFrame, None]]

The data. Defaults to None.

None

Returns:

Type Description
None

None

Source code in bozio_wasmer_simulations/simulation/empirical/base.py
def build_data_dads(
    self, year: int, data: Optional[Union[pd.DataFrame, None]] = None
) -> None:
    """
    Builds the DADS data.

    Args:
        year (int): The year.
        data (Optional[Union[pd.DataFrame, None]], optional): The data. Defaults to None.

    Returns:
        None
    """
    if data is not None:
        self._build_data_dads_from_dataframe(data=data, year=year)
    else:
        self.data_dads = self._init_data_dads(year=year)

columns_dads

columns_dads(year: int) -> List[str]

Builds the columns for the DADS data.

Parameters:

Name Type Description Default
year int

The year.

required

Returns:

Type Description
List[str]

The list of columns.

Source code in bozio_wasmer_simulations/simulation/empirical/base.py
def columns_dads(self, year: int) -> List[str]:
    """
    Builds the columns for the DADS data.

    Args:
        year (int): The year.

    Returns:
        (List[str]): The list of columns.
    """
    # Liste des variables à conserver lors de l'import
    columns = params["DADS"]["COLONNES"]
    # Ajout des primes de partage de la valeur si on se trouve en 2022
    if year == 2022:
        columns += params["DADS"]["COLONNES_2022"]
    return columns

iterate_simulation

iterate_simulation(data: DataFrame, tax_benefit_system: TaxBenefitSystem, year: int, list_var_simul: List[str], list_var_exclude: Optional[List[str]] = [], inplace: Optional[bool] = True) -> DataFrame

Iterates a simulation.

Parameters:

Name Type Description Default
data DataFrame

The data to simulate.

required
tax_benefit_system FranceTaxBenefitSystem

The tax benefit system.

required
year int

The year of the simulation.

required
list_var_simul List[str]

The list of variables to simulate.

required
list_var_exclude Optional[List[str]]

The list of variables to exclude. Defaults to [].

[]
inplace Optional[bool]

Whether to perform the simulation in place. Defaults to True.

True

Returns:

Type Description
DataFrame

The simulated data.

Source code in bozio_wasmer_simulations/simulation/empirical/base.py
def iterate_simulation(
    self,
    data: pd.DataFrame,
    tax_benefit_system: TaxBenefitSystem,
    year: int,
    list_var_simul: List[str],
    list_var_exclude: Optional[List[str]] = [],
    inplace: Optional[bool] = True,
) -> pd.DataFrame:
    """
    Iterates a simulation.

    Args:
        data (pd.DataFrame): The data to simulate.
        tax_benefit_system (FranceTaxBenefitSystem): The tax benefit system.
        year (int): The year of the simulation.
        list_var_simul (List[str]): The list of variables to simulate.
        list_var_exclude (Optional[List[str]], optional): The list of variables to exclude. Defaults to [].
        inplace (Optional[bool], optional): Whether to perform the simulation in place. Defaults to True.

    Returns:
        (pd.DataFrame): The simulated data.
    """
    # Disjonction de cas suivant la nécessité de réaliser une copie indépendante du jeu de données
    if inplace:
        data_res = data
    else:
        data_res = data.copy()

    # Initialisation des paramètres de la simulation
    simulation = SimulationBuilder().build_default_simulation(
        tax_benefit_system, len(data_res)
    )
    # Ajout de l'ensemble des données
    # /!\ On ajout 'smic_proratisé' aux variables à exclure de l'imputation pour contourner l'écueil de la mauvaise transition entre valeurs mensuelles et annuelles # + ['smic_proratise']
    # Finalement retiré car les rému restent divisées par 12 et ne sont pas intersectées avec la durée du contrat
    # Il s'agit sans doute d'un point à améliorer dans le package
    for caracteristic in np.setdiff1d(data_res.columns, list_var_exclude):
        try:  # if not (caracteristic in ['id', 'siren']) :
            simulation.set_input(
                caracteristic, year, data_res[caracteristic].to_numpy()
            )
            # logging
            self.logger.info(
                f"Successfully initialized {caracteristic} in the french tax benefit system"
            )
        except Exception as e:
            # Logging
            self.logger.warning(
                f"Cannot initialize {caracteristic} in the french tax benefit system : {e}"
            )
            pass
    # Ajout des cotisations et des allègements généraux
    for var_simul in tqdm(list_var_simul):
        data_res[var_simul] = simulation.calculate_add(var_simul, year)
        # Logging
        self.logger.info(f"Successfully simulated {var_simul} for period {year}")

    return data_res

preprocess_dads_simulation

preprocess_dads_simulation(year: int) -> None

Preprocesses the DADS data for simulation.

Parameters:

Name Type Description Default
year int

The year.

required

Returns:

Type Description
None

None

Source code in bozio_wasmer_simulations/simulation/empirical/base.py
def preprocess_dads_simulation(self, year: int) -> None:
    """
    Preprocesses the DADS data for simulation.

    Args:
        year (int): The year.

    Returns:
        None
    """
    # Preprocessing pour les allègements généraux
    self.data_dads = preprocess_dads_openfisca_ag(
        data_dads=self.data_dads,
        year=year,
        list_zonage_zrr=self.zonage_zrr,
        list_zonage_zrd=self.zonage_zrd,
    )

    # Suppression des variables inutiles pour les simulations
    self.data_dads.drop(
        np.setdiff1d(
            self.columns_dads(year=year) + ["pcs_2", "date_fin_contrat"],
            params["PREPROCESSING"]["KEEP"],
        ),
        axis=1,
        inplace=True,
        errors="ignore",
    )

    # Logging
    self.logger.info(
        "Successfully preprocessed data_dads to connect it with openfisca"
    )

simulate

simulate(year: int) -> None

Simulates the data.

Parameters:

Name Type Description Default
year int

The year.

required

Returns:

Type Description
None

None

Source code in bozio_wasmer_simulations/simulation/empirical/base.py
def simulate(self, year: int) -> None:
    """
    Simulates the data.

    Args:
        year (int): The year.

    Returns:
        None
    """
    # Le salaire de base  et smic_proratisé sont des variables mensuelles dans Openfisca et les DADS sont des variables annuelles
    # Les deux variables ayant l'attribut set_input=set_input_divide_by_period mais smic_proratisé est calculé en tenant compte de la durée du contrat
    # Si on simule d'abord un smic proratisé et qu'on en crée une variable annuelle, on divisera par 12 les deux grandeurs, alors qu'il faudrait les intersecter les deux avec la durée du contrat
    # Le rapport smic_proratise/salaire_de_base ou salaire_de_base/smic_proratise reste alors juste.
    # Simulation du SMIC proratisé
    if "smic_proratise" not in self.data_dads.columns:
        self.data_dads = self.simulate_smic_proratise(
            data=self.data_dads, year=year, list_var_exclude=[], inplace=True
        )
    # Liste des variables à simuler
    list_var_simul = np.setdiff1d(params["VARIABLES"], ["smic_proratise"])
    # Initialisation des paramètres du système sociofiscal
    tax_benefit_system = FranceTaxBenefitSystem()
    # Itération de la simulation
    self.data_dads = self.iterate_simulation(
        data=self.data_dads,
        tax_benefit_system=tax_benefit_system,
        year=year,
        list_var_simul=list_var_simul,
        list_var_exclude=[],
        inplace=True,
    )
    # Retraitement des variables simulées
    self.data_dads = preprocess_simulated_variables(data=self.data_dads)
    # Renomination de la quotité de travail pour pallier la mauvaise gestion annuel/mensuel de la variable dans openfisca
    self.data_dads.rename({"eqtp": "quotite_de_travail"}, axis=1, inplace=True)

    # Logging
    self.logger.info(
        f"Successfully simulated {list_var_simul} on data_dads observations"
    )

simulate_smic_proratise

simulate_smic_proratise(data: DataFrame, year: int, list_var_exclude: Optional[List[str]] = [], inplace: Optional[bool] = True) -> DataFrame

Simulates the prorated minimum wage.

Parameters:

Name Type Description Default
data DataFrame

The data to simulate.

required
year int

The year of the simulation.

required
list_var_exclude Optional[List[str]]

The list of variables to exclude. Defaults to [].

[]
inplace Optional[bool]

Whether to perform the simulation in place. Defaults to True.

True

Returns:

Type Description
DataFrame

The simulated data.

Source code in bozio_wasmer_simulations/simulation/empirical/base.py
def simulate_smic_proratise(
    self,
    data: pd.DataFrame,
    year: int,
    list_var_exclude: Optional[List[str]] = [],
    inplace: Optional[bool] = True,
) -> pd.DataFrame:
    """
    Simulates the prorated minimum wage.

    Args:
        data (pd.DataFrame): The data to simulate.
        year (int): The year of the simulation.
        list_var_exclude (Optional[List[str]], optional): The list of variables to exclude. Defaults to [].
        inplace (Optional[bool], optional): Whether to perform the simulation in place. Defaults to True.

    Returns:
        (pd.DataFrame): The simulated data.
    """
    # Initialisation des paramètres du système sociofiscal français
    tax_benefit_system = FranceTaxBenefitSystem()

    # Simulation du SMIC proratisé pour l'année des données
    data = self.iterate_simulation(
        data=data,
        tax_benefit_system=tax_benefit_system,
        year=year,
        list_var_simul=["smic_proratise"],
        list_var_exclude=list_var_exclude,
        inplace=inplace,
    )

    return data