Skip to content

CaptationMarginaleSimulator

A class for simulating marginal capture rates.

This class inherits from TheoreticalSimulator and CoreSimulation and provides methods for calculating the marginal capture rate, preprocessing the gross salary, building the columns for the DADS data, preprocessing the DADS data for simulation, simulating the marginal capture rate, simulating a reform, simulating multiple reforms, building the weights for the simulation, building the dataset, and calculating a synthetic rate.

Attributes:

Name Type Description
logger Logger

A logger for logging messages.

Source code in bozio_wasmer_simulations/simulation/theoretical/taux_captation_marginal.py
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
class CaptationMarginaleSimulator(TheoreticalSimulator, CoreSimulation):
    """
    A class for simulating marginal capture rates.

    This class inherits from TheoreticalSimulator and CoreSimulation and provides methods for calculating the marginal capture rate,
    preprocessing the gross salary, building the columns for the DADS data, preprocessing the DADS data for simulation,
    simulating the marginal capture rate, simulating a reform, simulating multiple reforms, building the weights for the simulation,
    building the dataset, and calculating a synthetic rate.

    Attributes:
        logger (logging.Logger):
            A logger for logging messages.
    """

    # Initialisation
    def __init__(
        self,
        project: str,
        log_filename: Optional[os.PathLike] = os.path.join(
            FILE_PATH.parents[3], "logs/captation_marginale_simulation.log"
        ),
    ) -> None:
        """
        Constructs all the necessary attributes for the ReformSimulation object.

        Args:
            project (str): The name of the CASD project
            log_filename (os.PathLike, optional): The path to the log file. Defaults to os.path.join(FILE_PATH.parents[3], 'logs/captation_marginale_simulation.log').

        """
        # Initialisation du simulateur
        TheoreticalSimulator.__init__(self, log_filename=log_filename)
        CoreSimulation.__init__(self, project=project, log_filename=log_filename)


    # Fonction auxiliaire de calcul du taux de captation marginal
    def _calculate_taux_captation_marginal(
        self, data: pd.DataFrame, name: Union[str, None]
    ) -> pd.DataFrame:
        """
        Calculates the marginal capture rate.

        Args:
            data (pd.DataFrame):
                The input data.
            name (Union[str, None]):
                The name of the scenario.

        Returns:
            (pd.DataFrame): The input data with the marginal capture rate calculated.
        """
        # Calcul des \delta S_NET / \delta Coût du travail
        if (name is None) | (name == ""):
            data["taux_captation_marginal"] = (
                data["salaire_net"].shift(-1) - data["salaire_net"]
            ) / (data["salaire_super_brut"].shift(-1) - data["salaire_super_brut"])
            data["taux_captation_marginal"] = data["taux_captation_marginal"].fillna(
                method="ffill"
            )
        else:
            data[f"taux_captation_marginal_{name}"] = (
                data["salaire_net"].shift(-1) - data["salaire_net"]
            ) / (
                data[f"salaire_super_brut_{name}"].shift(-1)
                - data[f"salaire_super_brut_{name}"]
            )
            data[f"taux_captation_marginal_{name}"] = data[
                f"taux_captation_marginal_{name}"
            ].fillna(method="ffill")

        return data

    # Fonction auxiliaire de preprocessing du salaire de base
    def _preprocess_salaire_de_base(
        self, data: pd.DataFrame, year: int, name: str
    ) -> pd.DataFrame:
        """
        Preprocesses the gross salary.

        Expresses the gross salary as a proportion of the SMIC and drops the unnecessary columns.

        Args:
            data (pd.DataFrame):
                The input data.
            year (int):
                The year for which the data is being processed.
            name (str):
                The name of the scenario.

        Returns:
            (pd.DataFrame): The preprocessed data.
        """
        # Expression du salaire en proportion du SMIC
        data["salaire_de_base_prop_smic"] = data["salaire_de_base"] / self.value_smic(
            year=year
        )
        # Liste des variables à conserver
        list_var_keep = (
            ["salaire_de_base", "salaire_de_base_prop_smic", "taux_captation_marginal"]
            if ((name is None) | (name == ""))
            else [
                "salaire_de_base",
                "salaire_de_base_prop_smic",
                f"taux_captation_marginal_{name}",
            ]
        )
        # Suppression des variables inutiles
        data.drop(
            np.setdiff1d(data.columns.tolist(), list_var_keep), axis=1, inplace=True
        )

        return data

    # Fonction auxiliaire de construction des colonnes des DADS
    def columns_dads(self, year: int) -> List[str]:
        """
        Returns the columns to keep from the DADS data.

        Args:
            year (int):
                The year for which the data is being processed.

        Returns:
            (List[str]): The columns to keep from the DADS data.
        """
        # Liste des variables à conserver lors de l'import
        return params["DADS"]["COLONNES_CAPTATION_MARGINALE"]

    # Fonction auxiliaire de preprocessing des DADS en vue d'une branchement avec openfisca
    def preprocess_dads_simulation(self, year: int) -> None:
        """
        Preprocesses the DADS data for simulation.

        Args:
            year (int):
                The year for which the data is being processed.
        """
        # Simulation du SMIC proratisé
        # Construction des variables d'intérêt
        # Conversion de la de la date de début de contrat de travail en datetime
        self.data_dads["date_fin_contrat"] = (
            pd.to_datetime(f"{year}-01-01", format="%Y-%m-%d")
            + pd.to_timedelta(arg=self.data_dads["datfin"], unit="D")
        ).dt.strftime("%Y-%m-%d")
        self.data_dads["contrat_de_travail_fin"] = np.where(
            self.data_dads["datfin"] < 360,
            self.data_dads["date_fin_contrat"],
            "2099-12-31",
        )
        # Conersion en string
        self.data_dads["date_debut_contrat"] = (
            pd.to_datetime(self.data_dads["date_debut_contrat"], format="%Y-%m-%d")
            .dt.strftime("%Y-%m-%d")
            .fillna("1970-01-01")
        )
        # Renomination de certaines variables
        self.data_dads.rename(
            {
                "date_debut_contrat": "contrat_de_travail_debut",
                "nbheur": "heures_remunerees_volume",
                "brut_s": "salaire_de_base",
            },
            axis=1,
            inplace=True,
        )
        # Ajout de la rémunération de l'apprenti
        self.data_dads["remuneration_apprenti"] = 0
        # Expression du salaire de base en fonction du SMIC
        self.data_dads["salaire_de_base_prop_smic"] = self.data_dads[
            "salaire_de_base"
        ] / self.value_smic(year=year)

        # Suppression des colonnes inutiles
        self.data_dads.drop(
            np.setdiff1d(
                self.columns_dads(year=year) + ["date_fin_contrat"],
                ["ident_s", "siren", "eqtp", "brut_s", "date_debut_contrat", "nbheur"],
            ).tolist(),
            axis=1,
            inplace=True,
        )

        # Logging
        self.logger.info(
            "Successfully preprocessed data_dads to connect it with openfisca"
        )

    # Fonction auxiliaire de simulation du taux marginal
    def simulate(
        self, year: int, simulation_step_smic: float, simulation_max_smic: float
    ) -> pd.DataFrame:
        """
        Simulates the marginal capture rate.

        Initializes the simulation case, initializes the tax-benefit system,
        simulates the variables, postprocesses the simulated variables,
        calculates the marginal capture rate, and preprocesses the gross salary.

        Args:
            year (int):
                The year for which the simulation is being performed.
            simulation_step_smic (float):
                The step size for the simulation, as a multiple of the SMIC value.
            simulation_max_smic (float):
                The maximum value for the simulation, as a multiple of the SMIC value.

        Returns:
            (pd.DataFrame): The simulated data.
        """
        # Initialisation du cas de simulation
        self.init_base_case(
            year=year,
            simulation_step_smic=simulation_step_smic,
            simulation_max_smic=simulation_max_smic,
        )
        # Initialisation du système socio-fiscal
        tax_benefit_system = FranceTaxBenefitSystem()
        # Extraction des variables à simuler
        list_var_simul = params["VARIABLES_CAPTATION_MARGINALE"]
        # Simulation
        data_simul = self.base_case_simulation(
            tax_benefit_system=tax_benefit_system,
            year=year,
            list_var_simul=list_var_simul,
        )
        # Retraitement des variables simulées
        data_simul = preprocess_simulated_variables(data=data_simul)
        # Calcul du taux marginal
        data_simul = self._calculate_taux_captation_marginal(data=data_simul, name=None)
        # Retraitement du salaire de base
        data_simul = self._preprocess_salaire_de_base(
            data=data_simul, year=year, name=None
        )

        return data_simul

    # Fonction auxiliaire de simulation d'une réforme
    def simulate_reform(
        self,
        name: str,
        reform_params: dict,
        year: int,
        simulation_step_smic: float,
        simulation_max_smic: float,
    ) -> pd.DataFrame:
        """
        Simulates a reform.

        Initializes the simulation case, initializes the tax-benefit system,
        applies the reform, simulates the variables, postprocesses the simulated variables,
        calculates the marginal capture rate, and preprocesses the gross salary.

        Args:
            name (str):
                The name of the reform.
            reform_params (dict):
                The parameters of the reform.
            year (int):
                The year for which the simulation is being performed.
            simulation_step_smic (float):
                The step size for the simulation, as a multiple of the SMIC value.
            simulation_max_smic (float):
                The maximum value for the simulation, as a multiple of the SMIC value.

        Returns:
            (pd.DataFrame): The simulated data.
        """
        # Initialisation du cas de simulation
        if not hasattr(self, "base_case"):
            self.init_base_case(
                year=year,
                simulation_step_smic=simulation_step_smic,
                simulation_max_smic=simulation_max_smic,
            )

        # Initialisation des paramètres du système sociofiscal
        tax_benefit_system = FranceTaxBenefitSystem()

        # Application de la réforme
        reformed_tax_benefit_system = create_and_apply_structural_reform_ag(
            tax_benefit_system=tax_benefit_system, dict_params=reform_params
        )

        # Logging
        self.logger.info("Successfully updated the tax-benefit system")

        # Extraction du type de la réforme
        reform_type = reform_params["TYPE"]

        # Itération de la simulation
        data_simul = self.base_case_simulation(
            tax_benefit_system=reformed_tax_benefit_system,
            year=year,
            list_var_simul=params["VARIABLES_CAPTATION_MARGINALE"]
            + [f"new_allegement_{reform_type}"],
        )

        # Retraitement des variables simulées
        data_simul = preprocess_simulated_variables(data=data_simul)

        # Construction du nouveau salaire super brut
        data_simul[f"salaire_super_brut_{name}"] = (
            data_simul[
                [
                    "salaire_super_brut",
                    "allegement_general",
                    "allegement_cotisation_maladie",
                    "allegement_cotisation_allocations_familiales",
                ]
            ].sum(axis=1)
            - data_simul[f"new_allegement_{reform_type}"]
        )

        # Calcul du taux marginal
        data_simul = self._calculate_taux_captation_marginal(data=data_simul, name=name)

        # Retraitement du salaire de base
        data_simul = self._preprocess_salaire_de_base(
            data=data_simul, year=year, name=name
        )

        return data_simul

    # Fonction auxiliaire de simulation de plusieurs réformes théoriques
    def iterate_reform_simulations(
        self,
        scenarios: dict,
        year: int,
        simulation_step_smic: float,
        simulation_max_smic: float,
    ) -> pd.DataFrame:
        """
        Simulates multiple reforms.

        Iterates over the scenarios and simulates each reform.
        Concatenates the simulated data for all reforms.

        Args:
            scenarios (dict):
                The scenarios to simulate.
            year (int):
                The year for which the simulation is being performed.
            simulation_step_smic (float):
                The step size for the simulation, as a multiple of the SMIC value.
            simulation_max_smic (float):
                The maximum value for the simulation, as a multiple of the SMIC value.

        Returns:
            (pd.DataFrame): The simulated data for all reforms.
        """
        # Initialisation de la liste résultat
        list_data_simul = []
        # Itération sur les scénarii référencés dans le jeu de données de paramètres
        for i, scenario in tqdm(enumerate(scenarios.keys())):
            # Itération des réformes
            data_simul = self.simulate_reform(
                name=scenario.lower(),
                reform_params=scenarios[scenario],
                year=year,
                simulation_step_smic=simulation_step_smic,
                simulation_max_smic=simulation_max_smic,
            )
            # Ajout à la liste résultat
            if i > 0:
                list_data_simul.append(
                    data_simul.drop(
                        ["salaire_de_base", "salaire_de_base_prop_smic"], axis=1
                    )
                )
            else:
                list_data_simul.append(data_simul)
        # Concaténation
        data_simul = pd.concat(list_data_simul, axis=1, join="outer")

        return data_simul

    # Fonction auxiliaire de construction des poids
    def build_weights_simulation(
        self,
        data_simul: pd.DataFrame,
        year: int,
        simulation_max_smic: float,
        list_var_groupby: Optional[List[str]] = ["salaire_de_base_prop_smic"],
    ) -> Tuple[pd.DataFrame, pd.DataFrame]:
        """
        Builds the weights for the simulation.

        Args:
            data_simul (pd.DataFrame):
                The simulated data.
            year (int):
                The year for which the data is being processed.
            simulation_max_smic (float):
                The maximum value for the simulation, as a multiple of the SMIC value.
            list_var_groupby (Optional[List[str]], optional):
                The variables to group by, by default ['salaire_de_base_prop_smic']

        Returns:
            (Tuple[pd.DataFrame, pd.DataFrame]): The descriptive statistics and the secret statistics.
        """
        # Construction du jeu de données data_dads s'il n'est pas déjà en argument
        if not hasattr(self, "data_dads"):
            self.build_data_dads(year=year)

        # Création de tranches de salaires
        self.data_dads["salaire_de_base_prop_smic"] = pd.to_numeric(
            pd.cut(
                x=self.data_dads["salaire_de_base_prop_smic"],
                bins=data_simul["salaire_de_base_prop_smic"].tolist(),
                labels=data_simul["salaire_de_base_prop_smic"].tolist()[:-1],
                include_lowest=True,
            )
        )
        # Restriction aux salaires inférieurs à 4 SMIC
        self.data_dads = self.data_dads.loc[
            self.data_dads["salaire_de_base_prop_smic"] <= simulation_max_smic
        ]

        # Initialisation de l'estimateur de statistiques descriptives
        estimator = SecretStatEstimator(
            data_source=self.data_dads, 
            list_var_groupby=list_var_groupby, 
            list_var_of_interest=["eqtp", "salaire_de_base"], 
            var_individu="ident_s", 
            var_entreprise="siren", 
            var_weights="weights", 
            threshold_secret_stat_effectif_individu=5, 
            threshold_secret_stat_effectif_entreprise=3, 
            threshold_secret_stat_contrib_individu=0.8, 
            threshold_secret_stat_contrib_entreprise=0.85, 
            strategy='total'
        )
        # Construction de la statistique descriptive et du contrôle du secret statistique
        data_stat_des, data_secret_stat = estimator.estimate_secret_stat(
            iterable_operations=["sum"], 
            include_total=False, 
            drop=False, 
            fill_value=np.nan, 
            nest=False
        )

        return data_stat_des, data_secret_stat

    # Méthode construisant le jeu de données avec les variables simulées
    def build(
        self,
        year_data: int,
        year_simul: int,
        simulation_step_smic: float,
        simulation_max_smic: float,
        scenarios: Optional[Union[dict, None]] = None,
        data: Optional[Union[pd.DataFrame, None]] = None,
    ) -> pd.DataFrame:
        """
        Builds the dataset.

        Loads the data, preprocesses it, adds weights, simulates the variables,
        simulates the reforms, builds the weights, and returns the dataset.

        Args:
            year_data (int):
                The year of the data.
            year_simul (int):
                The year for which the simulation is being performed.
            simulation_step_smic (float):
                The step size for the simulation, as a multiple of the SMIC value.
            simulation_max_smic (float):
                The maximum value for the simulation, as a multiple of the SMIC value.
            scenarios (Optional[Union[dict, None]], optional):
                The scenarios to simulate, by default None
            data (Optional[Union[pd.DataFrame, None]], optional):
                The data, by default None

        Returns:
            (pd.DataFrame): The dataset.
        """
        # Chargement du jeu de données
        self.build_data_dads(data=data, year=year_data)
        # Preprocessing
        self.preprocess_dads_simulation(year=year_data)
        # Ajout des poids
        self.add_weights(year_data=year_data, year_simul=year_simul)
        # Simulation des variables
        data_simul = self.simulate(
            year=year_simul,
            simulation_step_smic=simulation_step_smic,
            simulation_max_smic=simulation_max_smic,
        )
        # Itération des réformes
        if scenarios is not None:
            data_simul = pd.concat(
                [
                    data_simul,
                    self.iterate_reform_simulations(
                        scenarios=scenarios,
                        year=year_simul,
                        simulation_step_smic=simulation_step_smic,
                        simulation_max_smic=simulation_max_smic,
                    ).drop(["salaire_de_base", "salaire_de_base_prop_smic"], axis=1),
                ],
                axis=1,
                join="outer",
            )
        # Construction des poids
        data_stat_des, data_secret_stat = self.build_weights_simulation(
            data_simul=data_simul,
            year=year_simul,
            simulation_max_smic=simulation_max_smic,
        )
        # Concaténation avec les EQTP et la masse salariale par tranche
        data_simul = pd.concat(
            [
                data_simul.set_index("salaire_de_base_prop_smic"),
                data_stat_des.drop,
            ],
            axis=1,
            join="inner",
        ).reset_index()

        # Logging
        self.logger.info("Successfully build simulated DataFrame")

        return data_simul

    # Fonction auxiliaire calculant un taux syntéhtique
    def build_taux_synthetique(
        self,
        data: pd.DataFrame,
        elasticite: int,
        names: List[str],
        weights: Optional[List[str]] = ["eqtp_sum", "salaire_de_base_sum"],
    ) -> pd.DataFrame:
        """
        Calculates a synthetic rate.

        Args:
            data (pd.DataFrame):
                The input data.
            elasticite (int):
                The elasticity.
            names (List[str]):
                The names of the scenarios.
            weights (Optional[List[str]], optional):
                The weights, by default ['eqtp_sum', 'salaire_de_base_sum']

        Returns:
            (pd.DataFrame): The synthetic rate.

        Raises:
            ValueError: If the input data does not contain the necessary columns.
        """
        # Vérification que les les colonnes nécessaires sont bien présentes dans le jeu de données
        missing_columns = np.setdiff1d(
            weights
            + ["taux_captation_marginal"]
            + [f"taux_captation_marginal_{name.lower()}" for name in names],
            data.columns.tolist(),
        ).tolist()
        if missing_columns != []:
            # Logging
            self.logger.error(
                f"Given DataFrame should contain {missing_columns} as columns"
            )
            # Erreur
            raise ValueError(
                f"Given DataFrame should contain {missing_columns} as columns"
            )

        # Initialisation du jeu de données résultat
        data_res = pd.DataFrame(data=0, index=weights, columns=names)
        # Complétion du jeu de données
        for weight in weights:
            for name in names:
                data_res.loc[weight, name] = (
                    elasticite
                    * (
                        data[f"taux_captation_marginal_{name.lower()}"]
                        / data["taux_captation_marginal"]
                        - 1
                    )
                    .multiply(other=data[weight])
                    .sum()
                    / data[weight].sum()
                )

        return data_res

zonage_zrd property

zonage_zrd: List[str]

Imports the defense restructuring zones (ZRD) zoning.

Returns:

Type Description
List[str]

The list of defense restructuring zones.

zonage_zrr property

zonage_zrr: List[str]

Imports the rural revitalization zones (ZRR) zoning.

Returns:

Type Description
List[str]

The list of rural revitalization zones.

add_weights

add_weights(year_data: int, year_simul: int) -> None

Adds weights to the DADS data.

Parameters:

Name Type Description Default
year_data int

The year of the data.

required
year_simul int

The year of the simulation.

required

Returns:

Type Description
None

None

Source code in bozio_wasmer_simulations/simulation/empirical/base.py
def add_weights(self, year_data: int, year_simul: int) -> None:
    """
    Adds weights to the DADS data.

    Args:
        year_data (int): The year of the data.
        year_simul (int): The year of the simulation.

    Returns:
        None
    """
    # Simulation du SMIC proratisé
    # Simulation
    self.data_dads = self.simulate_smic_proratise(
        data=self.data_dads, year=year_data, list_var_exclude=[], inplace=True
    )

    # Si l'année des données ne coincide pas avec l'année des simulations, on met à jour les salaires pour qu'il corresponde au même niveau de SMIC
    if year_data != year_simul:
        # Renomination de la colonne simulée
        self.data_dads.rename(
            {"smic_proratise": f"smic_proratise_{year_data}"}, axis=1, inplace=True
        )
        # Simulation du SMIC proratisé pour l'année de simulation
        self.data_dads = self.simulate_smic_proratise(
            data=self.data_dads, year=year_simul, list_var_exclude=[], inplace=True
        )
        # Correction des salaires
        # Salaire en proportion du SMIC
        self.data_dads["salaire_brut_smic"] = (
            self.data_dads[["salaire_de_base", "remuneration_apprenti"]].sum(axis=1)
            / self.data_dads[f"smic_proratise_{year_data}"]
        )
        # Actualisation des réumnérations
        self.data_dads["salaire_de_base"] = np.where(
            self.data_dads["salaire_de_base"] > 0,
            self.data_dads["salaire_brut_smic"] * self.data_dads["smic_proratise"],
            0,
        )
        self.data_dads["remuneration_apprenti"] = np.where(
            self.data_dads["remuneration_apprenti"] > 0,
            self.data_dads["salaire_brut_smic"] * self.data_dads["smic_proratise"],
            0,
        )
        # Suppression du SMIC proratisé initialement calculé
        self.data_dads.drop(f"smic_proratise_{year_data}", axis=1, inplace=True)
    # Recréation d'un salaire brut
    self.data_dads["brut_s"] = self.data_dads[
        ["salaire_de_base", "remuneration_apprenti"]
    ].sum(axis=1)
    # Ajout des poids
    self.data_dads = add_weights_eqtp_accos(
        data_dads=self.data_dads,
        year=year_simul,
        var_eqtp="eqtp",
        var_sal_brut="brut_s",
        var_smic_proratise="smic_proratise",
    )
    # Suppression de la colonne de salaire brut
    self.data_dads.drop("brut_s", axis=1, inplace=True)

    # Logging
    self.logger.info("Successfully added accoss weights to data_dads")

base_case_simulation

base_case_simulation(tax_benefit_system: TaxBenefitSystem, year: int, list_var_simul: List[str]) -> DataFrame

Performs a simulation on the base case.

Parameters:

Name Type Description Default
tax_benefit_system TaxBenefitSystem

The tax-benefit system to use for the simulation.

required
year int

The year for which the simulation is performed.

required
list_var_simul List[str]

A list of variables to simulate.

required

Returns:

Type Description
DataFrame

A dataframe containing the results of the simulation.

Source code in bozio_wasmer_simulations/simulation/theoretical/base.py
def base_case_simulation(
    self, tax_benefit_system: TaxBenefitSystem, year: int, list_var_simul: List[str]
) -> pd.DataFrame:
    """
    Performs a simulation on the base case.

    Args:
        tax_benefit_system (TaxBenefitSystem):
            The tax-benefit system to use for the simulation.
        year (int):
            The year for which the simulation is performed.
        list_var_simul (List[str]):
            A list of variables to simulate.

    Returns:
        (pd.DataFrame): A dataframe containing the results of the simulation.
    """
    # Initialisation des paramètres de la simulation
    simulation_builder = SimulationBuilder()
    simulation = simulation_builder.build_from_entities(
        tax_benefit_system, self.base_case
    )
    # Initialisation du dictionnaire résultat
    dict_simul = {}
    # Itération sur la liste des variables à simuler
    for variable in list_var_simul:
        dict_simul[variable] = simulation.calculate_add(variable, year)
        # Logging
        self.logger.info(f"Successfully simulated {variable} for period {year}")
    # Conversion en dataFrame
    data_simul = pd.DataFrame(dict_simul)

    return data_simul

build

build(year_data: int, year_simul: int, simulation_step_smic: float, simulation_max_smic: float, scenarios: Optional[Union[dict, None]] = None, data: Optional[Union[DataFrame, None]] = None) -> DataFrame

Builds the dataset.

Loads the data, preprocesses it, adds weights, simulates the variables, simulates the reforms, builds the weights, and returns the dataset.

Parameters:

Name Type Description Default
year_data int

The year of the data.

required
year_simul int

The year for which the simulation is being performed.

required
simulation_step_smic float

The step size for the simulation, as a multiple of the SMIC value.

required
simulation_max_smic float

The maximum value for the simulation, as a multiple of the SMIC value.

required
scenarios Optional[Union[dict, None]]

The scenarios to simulate, by default None

None
data Optional[Union[DataFrame, None]]

The data, by default None

None

Returns:

Type Description
DataFrame

The dataset.

Source code in bozio_wasmer_simulations/simulation/theoretical/taux_captation_marginal.py
def build(
    self,
    year_data: int,
    year_simul: int,
    simulation_step_smic: float,
    simulation_max_smic: float,
    scenarios: Optional[Union[dict, None]] = None,
    data: Optional[Union[pd.DataFrame, None]] = None,
) -> pd.DataFrame:
    """
    Builds the dataset.

    Loads the data, preprocesses it, adds weights, simulates the variables,
    simulates the reforms, builds the weights, and returns the dataset.

    Args:
        year_data (int):
            The year of the data.
        year_simul (int):
            The year for which the simulation is being performed.
        simulation_step_smic (float):
            The step size for the simulation, as a multiple of the SMIC value.
        simulation_max_smic (float):
            The maximum value for the simulation, as a multiple of the SMIC value.
        scenarios (Optional[Union[dict, None]], optional):
            The scenarios to simulate, by default None
        data (Optional[Union[pd.DataFrame, None]], optional):
            The data, by default None

    Returns:
        (pd.DataFrame): The dataset.
    """
    # Chargement du jeu de données
    self.build_data_dads(data=data, year=year_data)
    # Preprocessing
    self.preprocess_dads_simulation(year=year_data)
    # Ajout des poids
    self.add_weights(year_data=year_data, year_simul=year_simul)
    # Simulation des variables
    data_simul = self.simulate(
        year=year_simul,
        simulation_step_smic=simulation_step_smic,
        simulation_max_smic=simulation_max_smic,
    )
    # Itération des réformes
    if scenarios is not None:
        data_simul = pd.concat(
            [
                data_simul,
                self.iterate_reform_simulations(
                    scenarios=scenarios,
                    year=year_simul,
                    simulation_step_smic=simulation_step_smic,
                    simulation_max_smic=simulation_max_smic,
                ).drop(["salaire_de_base", "salaire_de_base_prop_smic"], axis=1),
            ],
            axis=1,
            join="outer",
        )
    # Construction des poids
    data_stat_des, data_secret_stat = self.build_weights_simulation(
        data_simul=data_simul,
        year=year_simul,
        simulation_max_smic=simulation_max_smic,
    )
    # Concaténation avec les EQTP et la masse salariale par tranche
    data_simul = pd.concat(
        [
            data_simul.set_index("salaire_de_base_prop_smic"),
            data_stat_des.drop,
        ],
        axis=1,
        join="inner",
    ).reset_index()

    # Logging
    self.logger.info("Successfully build simulated DataFrame")

    return data_simul

build_data_dads

build_data_dads(year: int, data: Optional[Union[DataFrame, None]] = None) -> None

Builds the DADS data.

Parameters:

Name Type Description Default
year int

The year.

required
data Optional[Union[DataFrame, None]]

The data. Defaults to None.

None

Returns:

Type Description
None

None

Source code in bozio_wasmer_simulations/simulation/empirical/base.py
def build_data_dads(
    self, year: int, data: Optional[Union[pd.DataFrame, None]] = None
) -> None:
    """
    Builds the DADS data.

    Args:
        year (int): The year.
        data (Optional[Union[pd.DataFrame, None]], optional): The data. Defaults to None.

    Returns:
        None
    """
    if data is not None:
        self._build_data_dads_from_dataframe(data=data, year=year)
    else:
        self.data_dads = self._init_data_dads(year=year)

build_taux_synthetique

build_taux_synthetique(data: DataFrame, elasticite: int, names: List[str], weights: Optional[List[str]] = ['eqtp_sum', 'salaire_de_base_sum']) -> DataFrame

Calculates a synthetic rate.

Parameters:

Name Type Description Default
data DataFrame

The input data.

required
elasticite int

The elasticity.

required
names List[str]

The names of the scenarios.

required
weights Optional[List[str]]

The weights, by default ['eqtp_sum', 'salaire_de_base_sum']

['eqtp_sum', 'salaire_de_base_sum']

Returns:

Type Description
DataFrame

The synthetic rate.

Raises:

Type Description
ValueError

If the input data does not contain the necessary columns.

Source code in bozio_wasmer_simulations/simulation/theoretical/taux_captation_marginal.py
def build_taux_synthetique(
    self,
    data: pd.DataFrame,
    elasticite: int,
    names: List[str],
    weights: Optional[List[str]] = ["eqtp_sum", "salaire_de_base_sum"],
) -> pd.DataFrame:
    """
    Calculates a synthetic rate.

    Args:
        data (pd.DataFrame):
            The input data.
        elasticite (int):
            The elasticity.
        names (List[str]):
            The names of the scenarios.
        weights (Optional[List[str]], optional):
            The weights, by default ['eqtp_sum', 'salaire_de_base_sum']

    Returns:
        (pd.DataFrame): The synthetic rate.

    Raises:
        ValueError: If the input data does not contain the necessary columns.
    """
    # Vérification que les les colonnes nécessaires sont bien présentes dans le jeu de données
    missing_columns = np.setdiff1d(
        weights
        + ["taux_captation_marginal"]
        + [f"taux_captation_marginal_{name.lower()}" for name in names],
        data.columns.tolist(),
    ).tolist()
    if missing_columns != []:
        # Logging
        self.logger.error(
            f"Given DataFrame should contain {missing_columns} as columns"
        )
        # Erreur
        raise ValueError(
            f"Given DataFrame should contain {missing_columns} as columns"
        )

    # Initialisation du jeu de données résultat
    data_res = pd.DataFrame(data=0, index=weights, columns=names)
    # Complétion du jeu de données
    for weight in weights:
        for name in names:
            data_res.loc[weight, name] = (
                elasticite
                * (
                    data[f"taux_captation_marginal_{name.lower()}"]
                    / data["taux_captation_marginal"]
                    - 1
                )
                .multiply(other=data[weight])
                .sum()
                / data[weight].sum()
            )

    return data_res

build_weights_simulation

build_weights_simulation(data_simul: DataFrame, year: int, simulation_max_smic: float, list_var_groupby: Optional[List[str]] = ['salaire_de_base_prop_smic']) -> Tuple[DataFrame, DataFrame]

Builds the weights for the simulation.

Parameters:

Name Type Description Default
data_simul DataFrame

The simulated data.

required
year int

The year for which the data is being processed.

required
simulation_max_smic float

The maximum value for the simulation, as a multiple of the SMIC value.

required
list_var_groupby Optional[List[str]]

The variables to group by, by default ['salaire_de_base_prop_smic']

['salaire_de_base_prop_smic']

Returns:

Type Description
Tuple[DataFrame, DataFrame]

The descriptive statistics and the secret statistics.

Source code in bozio_wasmer_simulations/simulation/theoretical/taux_captation_marginal.py
def build_weights_simulation(
    self,
    data_simul: pd.DataFrame,
    year: int,
    simulation_max_smic: float,
    list_var_groupby: Optional[List[str]] = ["salaire_de_base_prop_smic"],
) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """
    Builds the weights for the simulation.

    Args:
        data_simul (pd.DataFrame):
            The simulated data.
        year (int):
            The year for which the data is being processed.
        simulation_max_smic (float):
            The maximum value for the simulation, as a multiple of the SMIC value.
        list_var_groupby (Optional[List[str]], optional):
            The variables to group by, by default ['salaire_de_base_prop_smic']

    Returns:
        (Tuple[pd.DataFrame, pd.DataFrame]): The descriptive statistics and the secret statistics.
    """
    # Construction du jeu de données data_dads s'il n'est pas déjà en argument
    if not hasattr(self, "data_dads"):
        self.build_data_dads(year=year)

    # Création de tranches de salaires
    self.data_dads["salaire_de_base_prop_smic"] = pd.to_numeric(
        pd.cut(
            x=self.data_dads["salaire_de_base_prop_smic"],
            bins=data_simul["salaire_de_base_prop_smic"].tolist(),
            labels=data_simul["salaire_de_base_prop_smic"].tolist()[:-1],
            include_lowest=True,
        )
    )
    # Restriction aux salaires inférieurs à 4 SMIC
    self.data_dads = self.data_dads.loc[
        self.data_dads["salaire_de_base_prop_smic"] <= simulation_max_smic
    ]

    # Initialisation de l'estimateur de statistiques descriptives
    estimator = SecretStatEstimator(
        data_source=self.data_dads, 
        list_var_groupby=list_var_groupby, 
        list_var_of_interest=["eqtp", "salaire_de_base"], 
        var_individu="ident_s", 
        var_entreprise="siren", 
        var_weights="weights", 
        threshold_secret_stat_effectif_individu=5, 
        threshold_secret_stat_effectif_entreprise=3, 
        threshold_secret_stat_contrib_individu=0.8, 
        threshold_secret_stat_contrib_entreprise=0.85, 
        strategy='total'
    )
    # Construction de la statistique descriptive et du contrôle du secret statistique
    data_stat_des, data_secret_stat = estimator.estimate_secret_stat(
        iterable_operations=["sum"], 
        include_total=False, 
        drop=False, 
        fill_value=np.nan, 
        nest=False
    )

    return data_stat_des, data_secret_stat

columns_dads

columns_dads(year: int) -> List[str]

Returns the columns to keep from the DADS data.

Parameters:

Name Type Description Default
year int

The year for which the data is being processed.

required

Returns:

Type Description
List[str]

The columns to keep from the DADS data.

Source code in bozio_wasmer_simulations/simulation/theoretical/taux_captation_marginal.py
def columns_dads(self, year: int) -> List[str]:
    """
    Returns the columns to keep from the DADS data.

    Args:
        year (int):
            The year for which the data is being processed.

    Returns:
        (List[str]): The columns to keep from the DADS data.
    """
    # Liste des variables à conserver lors de l'import
    return params["DADS"]["COLONNES_CAPTATION_MARGINALE"]

init_base_case

init_base_case(year: int, simulation_step_smic: float, simulation_max_smic: float) -> None

Initializes a base case for simulation.

Parameters:

Name Type Description Default
year int

The year for which the simulation is performed.

required
simulation_step_smic float

The step size for the simulation, as a multiple of the SMIC value.

required
simulation_max_smic float

The maximum value for the simulation, as a multiple of the SMIC value.

required
Source code in bozio_wasmer_simulations/simulation/theoretical/base.py
def init_base_case(
    self, year: int, simulation_step_smic: float, simulation_max_smic: float
) -> None:
    """
    Initializes a base case for simulation.

    Args:
        year (int):
            The year for which the simulation is performed.
        simulation_step_smic (float):
            The step size for the simulation, as a multiple of the SMIC value.
        simulation_max_smic (float):
            The maximum value for the simulation, as a multiple of the SMIC value.
    """
    # Initialisation du système socio-fiscal contenant les valeurs de SMIC en paramètres
    tax_benefit_system = FranceTaxBenefitSystem()
    # Extraction de la valeur moyenne de SMIC sur l'année
    value_smic = self.value_smic(year=year)
    # Calcul de la valeur maximale de la simulation et de la valeur du pas
    simulation_max = simulation_max_smic * value_smic
    simulation_step = simulation_step_smic * value_smic
    # Calcul du nombre d'observations dans la simulation entre le min (1 SMIC) et le max avec le pas spécifié
    simulation_count = ceil((simulation_max - value_smic) / simulation_step) + 1
    # Définition des caractéristiques de l'individu
    self.base_case = {
        "individus": {
            "individu_1": {
                "effectif_entreprise": {year: 200},
                "depcom_entreprise": {year: "93001"},
                "contrat_de_travail_debut": {year: "2009-03-16"},
                "heures_remunerees_volume": {year: 1820},
                "prime_exceptionnelle_pouvoir_achat": {year: 0},
                "quotite_de_travail": {year: 12},
                "prime_partage_valeur_exoneree": {year: 0},
                "prime_partage_valeur_non_exoneree": {year: 0},
                "age": {year: 40},
                "secteur_activite_employeur": {
                    year: "non_agricole"
                },  # {year : TypesSecteurActivite.non_agricole},
                "exoneration_cotisations_employeur_tode_eligibilite": {year: False},
                "choix_exoneration_cotisations_employeur_agricole": {year: False},
                "travailleur_occasionnel_agricole": {year: False},
                "zone_restructuration_defense": {year: False},
                "zone_revitalisation_rurale": {year: False},
                "categorie_salarie": {
                    year: "prive_non_cadre"
                },  # {year : TypesCategorieSalarie.prive_non_cadre},
                "contrat_de_travail": {
                    year: "temps_plein"
                },  # {year : TypesContratDeTravail.temps_plein},
                "contrat_de_travail_fin": {year: "2099-12-31"},
                "contrat_de_travail_type": {
                    year: "cdi"
                },  # {year : TypesContrat.cdi},
                "salarie_regime_alsace_moselle": {year: False},
                #'salaire_de_base'
                "remuneration_apprenti": {year: 0},
                "apprentissage_contrat_debut": {year: "1970-01-01"},
                "apprenti": {year: False},
                "stage_duree_heures": {year: 0},
                "stage_gratification": {year: 0},
                "taux_versement_transport": {year: 0.032},
                "taux_accident_travail": {year: 0.0212},
            }
        },
        "menages": {
            "menage_1": {
                "personne_de_reference": ["individu_1"],
                "depcom": {year: "93001"},
            },
        },
        "familles": {"famille_1": {"parents": ["individu_1"]}},
        "foyers_fiscaux": {"foyer_fiscal_1": {"declarants": ["individu_1"]}},
        "axes": [
            [
                {
                    "count": simulation_count,
                    "name": "salaire_de_base",
                    "min": value_smic,
                    "max": simulation_max,
                    "period": year,
                }
            ]
        ],
    }

    # Logging
    self.logger.info("Successfully initialized a test case")

iterate_reform_simulations

iterate_reform_simulations(scenarios: dict, year: int, simulation_step_smic: float, simulation_max_smic: float) -> DataFrame

Simulates multiple reforms.

Iterates over the scenarios and simulates each reform. Concatenates the simulated data for all reforms.

Parameters:

Name Type Description Default
scenarios dict

The scenarios to simulate.

required
year int

The year for which the simulation is being performed.

required
simulation_step_smic float

The step size for the simulation, as a multiple of the SMIC value.

required
simulation_max_smic float

The maximum value for the simulation, as a multiple of the SMIC value.

required

Returns:

Type Description
DataFrame

The simulated data for all reforms.

Source code in bozio_wasmer_simulations/simulation/theoretical/taux_captation_marginal.py
def iterate_reform_simulations(
    self,
    scenarios: dict,
    year: int,
    simulation_step_smic: float,
    simulation_max_smic: float,
) -> pd.DataFrame:
    """
    Simulates multiple reforms.

    Iterates over the scenarios and simulates each reform.
    Concatenates the simulated data for all reforms.

    Args:
        scenarios (dict):
            The scenarios to simulate.
        year (int):
            The year for which the simulation is being performed.
        simulation_step_smic (float):
            The step size for the simulation, as a multiple of the SMIC value.
        simulation_max_smic (float):
            The maximum value for the simulation, as a multiple of the SMIC value.

    Returns:
        (pd.DataFrame): The simulated data for all reforms.
    """
    # Initialisation de la liste résultat
    list_data_simul = []
    # Itération sur les scénarii référencés dans le jeu de données de paramètres
    for i, scenario in tqdm(enumerate(scenarios.keys())):
        # Itération des réformes
        data_simul = self.simulate_reform(
            name=scenario.lower(),
            reform_params=scenarios[scenario],
            year=year,
            simulation_step_smic=simulation_step_smic,
            simulation_max_smic=simulation_max_smic,
        )
        # Ajout à la liste résultat
        if i > 0:
            list_data_simul.append(
                data_simul.drop(
                    ["salaire_de_base", "salaire_de_base_prop_smic"], axis=1
                )
            )
        else:
            list_data_simul.append(data_simul)
    # Concaténation
    data_simul = pd.concat(list_data_simul, axis=1, join="outer")

    return data_simul

iterate_simulation

iterate_simulation(data: DataFrame, tax_benefit_system: TaxBenefitSystem, year: int, list_var_simul: List[str], list_var_exclude: Optional[List[str]] = [], inplace: Optional[bool] = True) -> DataFrame

Iterates a simulation.

Parameters:

Name Type Description Default
data DataFrame

The data to simulate.

required
tax_benefit_system FranceTaxBenefitSystem

The tax benefit system.

required
year int

The year of the simulation.

required
list_var_simul List[str]

The list of variables to simulate.

required
list_var_exclude Optional[List[str]]

The list of variables to exclude. Defaults to [].

[]
inplace Optional[bool]

Whether to perform the simulation in place. Defaults to True.

True

Returns:

Type Description
DataFrame

The simulated data.

Source code in bozio_wasmer_simulations/simulation/empirical/base.py
def iterate_simulation(
    self,
    data: pd.DataFrame,
    tax_benefit_system: TaxBenefitSystem,
    year: int,
    list_var_simul: List[str],
    list_var_exclude: Optional[List[str]] = [],
    inplace: Optional[bool] = True,
) -> pd.DataFrame:
    """
    Iterates a simulation.

    Args:
        data (pd.DataFrame): The data to simulate.
        tax_benefit_system (FranceTaxBenefitSystem): The tax benefit system.
        year (int): The year of the simulation.
        list_var_simul (List[str]): The list of variables to simulate.
        list_var_exclude (Optional[List[str]], optional): The list of variables to exclude. Defaults to [].
        inplace (Optional[bool], optional): Whether to perform the simulation in place. Defaults to True.

    Returns:
        (pd.DataFrame): The simulated data.
    """
    # Disjonction de cas suivant la nécessité de réaliser une copie indépendante du jeu de données
    if inplace:
        data_res = data
    else:
        data_res = data.copy()

    # Initialisation des paramètres de la simulation
    simulation = SimulationBuilder().build_default_simulation(
        tax_benefit_system, len(data_res)
    )
    # Ajout de l'ensemble des données
    # /!\ On ajout 'smic_proratisé' aux variables à exclure de l'imputation pour contourner l'écueil de la mauvaise transition entre valeurs mensuelles et annuelles # + ['smic_proratise']
    # Finalement retiré car les rému restent divisées par 12 et ne sont pas intersectées avec la durée du contrat
    # Il s'agit sans doute d'un point à améliorer dans le package
    for caracteristic in np.setdiff1d(data_res.columns, list_var_exclude):
        try:  # if not (caracteristic in ['id', 'siren']) :
            simulation.set_input(
                caracteristic, year, data_res[caracteristic].to_numpy()
            )
            # logging
            self.logger.info(
                f"Successfully initialized {caracteristic} in the french tax benefit system"
            )
        except Exception as e:
            # Logging
            self.logger.warning(
                f"Cannot initialize {caracteristic} in the french tax benefit system : {e}"
            )
            pass
    # Ajout des cotisations et des allègements généraux
    for var_simul in tqdm(list_var_simul):
        data_res[var_simul] = simulation.calculate_add(var_simul, year)
        # Logging
        self.logger.info(f"Successfully simulated {var_simul} for period {year}")

    return data_res

plot

plot(data: DataFrame, x: str, hue: Union[str, List[str]], x_label: Optional[Union[str, None]] = None, y_label: Optional[Union[str, None]] = None, hue_label: Optional[Union[str, None]] = None, labels: Optional[Dict[str, str]] = {}, export_key: Optional[Union[PathLike, None]] = None, show: Optional[bool] = True) -> None

Plots the results of the simulation.

Parameters:

Name Type Description Default
data DataFrame

The data to plot.

required
x str

The variable to use for the x-axis.

required
hue Union[str, List[str]]

The variable(s) to use for the hue.

required
x_label Optional[Union[str, None]]

The label for the x-axis. Defaults to None.

None
y_label Optional[Union[str, None]]

The label for the y-axis. Defaults to None.

None
hue_label Optional[Union[str, None]]

The label for the hue. Defaults to None.

None
labels Optional[Dict[str, str]]

A dictionary of labels to apply to the data. Defaults to {}.

{}
export_key Optional[Union[PathLike, None]]

The path to save the plot to. Defaults to None.

None
show Optional[bool]

Whether to display the plot. Defaults to True.

True

Returns:

Type Description
None

None

Source code in bozio_wasmer_simulations/simulation/theoretical/base.py
def plot(
    self,
    data: pd.DataFrame,
    x: str,
    hue: Union[str, List[str]],
    x_label: Optional[Union[str, None]] = None,
    y_label: Optional[Union[str, None]] = None,
    hue_label: Optional[Union[str, None]] = None,
    labels: Optional[Dict[str, str]] = {},
    export_key: Optional[Union[os.PathLike, None]] = None,
    show: Optional[bool] = True,
) -> None:
    """
    Plots the results of the simulation.

    Args:
        data (pd.DataFrame):
            The data to plot.
        x (str):
            The variable to use for the x-axis.
        hue (Union[str, List[str]]):
            The variable(s) to use for the hue.
        x_label (Optional[Union[str, None]], optional):
            The label for the x-axis. Defaults to None.
        y_label (Optional[Union[str, None]], optional):
            The label for the y-axis. Defaults to None.
        hue_label (Optional[Union[str, None]], optional):
            The label for the hue. Defaults to None.
        labels (Optional[Dict[str, str]], optional):
            A dictionary of labels to apply to the data. Defaults to {}.
        export_key (Optional[Union[os.PathLike, None]], optional):
            The path to save the plot to. Defaults to None.
        show (Optional[bool], optional):
            Whether to display the plot. Defaults to True.

    Returns:
        None
    """
    # Conversion des arguments en liste
    if isinstance(hue, str):
        hue = [hue]

    # Création des noms à partir des labels
    id_name = x_label if (x_label is not None) else x
    var_name = hue_label if (hue_label is not None) else "Variable"
    value_name = y_label if (y_label is not None) else "Valeur"

    # Réorganisation du jeu de données
    data_graph = pd.melt(
        frame=data,
        id_vars=x,
        value_vars=hue,
        var_name=var_name,
        value_name=value_name,
    ).rename({x: id_name}, axis=1)
    # Application des labels
    data_graph[var_name] = (
        data_graph[var_name].map(labels).fillna(data_graph[var_name])
    )

    # Initialisation de la figure
    fig, ax = plt.subplots()
    # Construction du graphique
    sns.lineplot(data=data_graph, x=id_name, y=value_name, hue=var_name)
    # Formattage de l'axe des ordonnées
    if all(["_prop_" in var_hue for var_hue in hue]):
        ax.yaxis.set_major_formatter(PercentFormatter(xmax=1))
    # Exportation
    if export_key is not None:
        plt.savefig(export_key, bbox_inches="tight")

    # Logging
    self.logger.info(f"Successfully build graph")

    if show:
        plt.show()
    else:
        plt.close("all")

preprocess_dads_simulation

preprocess_dads_simulation(year: int) -> None

Preprocesses the DADS data for simulation.

Parameters:

Name Type Description Default
year int

The year for which the data is being processed.

required
Source code in bozio_wasmer_simulations/simulation/theoretical/taux_captation_marginal.py
def preprocess_dads_simulation(self, year: int) -> None:
    """
    Preprocesses the DADS data for simulation.

    Args:
        year (int):
            The year for which the data is being processed.
    """
    # Simulation du SMIC proratisé
    # Construction des variables d'intérêt
    # Conversion de la de la date de début de contrat de travail en datetime
    self.data_dads["date_fin_contrat"] = (
        pd.to_datetime(f"{year}-01-01", format="%Y-%m-%d")
        + pd.to_timedelta(arg=self.data_dads["datfin"], unit="D")
    ).dt.strftime("%Y-%m-%d")
    self.data_dads["contrat_de_travail_fin"] = np.where(
        self.data_dads["datfin"] < 360,
        self.data_dads["date_fin_contrat"],
        "2099-12-31",
    )
    # Conersion en string
    self.data_dads["date_debut_contrat"] = (
        pd.to_datetime(self.data_dads["date_debut_contrat"], format="%Y-%m-%d")
        .dt.strftime("%Y-%m-%d")
        .fillna("1970-01-01")
    )
    # Renomination de certaines variables
    self.data_dads.rename(
        {
            "date_debut_contrat": "contrat_de_travail_debut",
            "nbheur": "heures_remunerees_volume",
            "brut_s": "salaire_de_base",
        },
        axis=1,
        inplace=True,
    )
    # Ajout de la rémunération de l'apprenti
    self.data_dads["remuneration_apprenti"] = 0
    # Expression du salaire de base en fonction du SMIC
    self.data_dads["salaire_de_base_prop_smic"] = self.data_dads[
        "salaire_de_base"
    ] / self.value_smic(year=year)

    # Suppression des colonnes inutiles
    self.data_dads.drop(
        np.setdiff1d(
            self.columns_dads(year=year) + ["date_fin_contrat"],
            ["ident_s", "siren", "eqtp", "brut_s", "date_debut_contrat", "nbheur"],
        ).tolist(),
        axis=1,
        inplace=True,
    )

    # Logging
    self.logger.info(
        "Successfully preprocessed data_dads to connect it with openfisca"
    )

simulate

simulate(year: int, simulation_step_smic: float, simulation_max_smic: float) -> DataFrame

Simulates the marginal capture rate.

Initializes the simulation case, initializes the tax-benefit system, simulates the variables, postprocesses the simulated variables, calculates the marginal capture rate, and preprocesses the gross salary.

Parameters:

Name Type Description Default
year int

The year for which the simulation is being performed.

required
simulation_step_smic float

The step size for the simulation, as a multiple of the SMIC value.

required
simulation_max_smic float

The maximum value for the simulation, as a multiple of the SMIC value.

required

Returns:

Type Description
DataFrame

The simulated data.

Source code in bozio_wasmer_simulations/simulation/theoretical/taux_captation_marginal.py
def simulate(
    self, year: int, simulation_step_smic: float, simulation_max_smic: float
) -> pd.DataFrame:
    """
    Simulates the marginal capture rate.

    Initializes the simulation case, initializes the tax-benefit system,
    simulates the variables, postprocesses the simulated variables,
    calculates the marginal capture rate, and preprocesses the gross salary.

    Args:
        year (int):
            The year for which the simulation is being performed.
        simulation_step_smic (float):
            The step size for the simulation, as a multiple of the SMIC value.
        simulation_max_smic (float):
            The maximum value for the simulation, as a multiple of the SMIC value.

    Returns:
        (pd.DataFrame): The simulated data.
    """
    # Initialisation du cas de simulation
    self.init_base_case(
        year=year,
        simulation_step_smic=simulation_step_smic,
        simulation_max_smic=simulation_max_smic,
    )
    # Initialisation du système socio-fiscal
    tax_benefit_system = FranceTaxBenefitSystem()
    # Extraction des variables à simuler
    list_var_simul = params["VARIABLES_CAPTATION_MARGINALE"]
    # Simulation
    data_simul = self.base_case_simulation(
        tax_benefit_system=tax_benefit_system,
        year=year,
        list_var_simul=list_var_simul,
    )
    # Retraitement des variables simulées
    data_simul = preprocess_simulated_variables(data=data_simul)
    # Calcul du taux marginal
    data_simul = self._calculate_taux_captation_marginal(data=data_simul, name=None)
    # Retraitement du salaire de base
    data_simul = self._preprocess_salaire_de_base(
        data=data_simul, year=year, name=None
    )

    return data_simul

simulate_reform

simulate_reform(name: str, reform_params: dict, year: int, simulation_step_smic: float, simulation_max_smic: float) -> DataFrame

Simulates a reform.

Initializes the simulation case, initializes the tax-benefit system, applies the reform, simulates the variables, postprocesses the simulated variables, calculates the marginal capture rate, and preprocesses the gross salary.

Parameters:

Name Type Description Default
name str

The name of the reform.

required
reform_params dict

The parameters of the reform.

required
year int

The year for which the simulation is being performed.

required
simulation_step_smic float

The step size for the simulation, as a multiple of the SMIC value.

required
simulation_max_smic float

The maximum value for the simulation, as a multiple of the SMIC value.

required

Returns:

Type Description
DataFrame

The simulated data.

Source code in bozio_wasmer_simulations/simulation/theoretical/taux_captation_marginal.py
def simulate_reform(
    self,
    name: str,
    reform_params: dict,
    year: int,
    simulation_step_smic: float,
    simulation_max_smic: float,
) -> pd.DataFrame:
    """
    Simulates a reform.

    Initializes the simulation case, initializes the tax-benefit system,
    applies the reform, simulates the variables, postprocesses the simulated variables,
    calculates the marginal capture rate, and preprocesses the gross salary.

    Args:
        name (str):
            The name of the reform.
        reform_params (dict):
            The parameters of the reform.
        year (int):
            The year for which the simulation is being performed.
        simulation_step_smic (float):
            The step size for the simulation, as a multiple of the SMIC value.
        simulation_max_smic (float):
            The maximum value for the simulation, as a multiple of the SMIC value.

    Returns:
        (pd.DataFrame): The simulated data.
    """
    # Initialisation du cas de simulation
    if not hasattr(self, "base_case"):
        self.init_base_case(
            year=year,
            simulation_step_smic=simulation_step_smic,
            simulation_max_smic=simulation_max_smic,
        )

    # Initialisation des paramètres du système sociofiscal
    tax_benefit_system = FranceTaxBenefitSystem()

    # Application de la réforme
    reformed_tax_benefit_system = create_and_apply_structural_reform_ag(
        tax_benefit_system=tax_benefit_system, dict_params=reform_params
    )

    # Logging
    self.logger.info("Successfully updated the tax-benefit system")

    # Extraction du type de la réforme
    reform_type = reform_params["TYPE"]

    # Itération de la simulation
    data_simul = self.base_case_simulation(
        tax_benefit_system=reformed_tax_benefit_system,
        year=year,
        list_var_simul=params["VARIABLES_CAPTATION_MARGINALE"]
        + [f"new_allegement_{reform_type}"],
    )

    # Retraitement des variables simulées
    data_simul = preprocess_simulated_variables(data=data_simul)

    # Construction du nouveau salaire super brut
    data_simul[f"salaire_super_brut_{name}"] = (
        data_simul[
            [
                "salaire_super_brut",
                "allegement_general",
                "allegement_cotisation_maladie",
                "allegement_cotisation_allocations_familiales",
            ]
        ].sum(axis=1)
        - data_simul[f"new_allegement_{reform_type}"]
    )

    # Calcul du taux marginal
    data_simul = self._calculate_taux_captation_marginal(data=data_simul, name=name)

    # Retraitement du salaire de base
    data_simul = self._preprocess_salaire_de_base(
        data=data_simul, year=year, name=name
    )

    return data_simul

simulate_smic_proratise

simulate_smic_proratise(data: DataFrame, year: int, list_var_exclude: Optional[List[str]] = [], inplace: Optional[bool] = True) -> DataFrame

Simulates the prorated minimum wage.

Parameters:

Name Type Description Default
data DataFrame

The data to simulate.

required
year int

The year of the simulation.

required
list_var_exclude Optional[List[str]]

The list of variables to exclude. Defaults to [].

[]
inplace Optional[bool]

Whether to perform the simulation in place. Defaults to True.

True

Returns:

Type Description
DataFrame

The simulated data.

Source code in bozio_wasmer_simulations/simulation/empirical/base.py
def simulate_smic_proratise(
    self,
    data: pd.DataFrame,
    year: int,
    list_var_exclude: Optional[List[str]] = [],
    inplace: Optional[bool] = True,
) -> pd.DataFrame:
    """
    Simulates the prorated minimum wage.

    Args:
        data (pd.DataFrame): The data to simulate.
        year (int): The year of the simulation.
        list_var_exclude (Optional[List[str]], optional): The list of variables to exclude. Defaults to [].
        inplace (Optional[bool], optional): Whether to perform the simulation in place. Defaults to True.

    Returns:
        (pd.DataFrame): The simulated data.
    """
    # Initialisation des paramètres du système sociofiscal français
    tax_benefit_system = FranceTaxBenefitSystem()

    # Simulation du SMIC proratisé pour l'année des données
    data = self.iterate_simulation(
        data=data,
        tax_benefit_system=tax_benefit_system,
        year=year,
        list_var_simul=["smic_proratise"],
        list_var_exclude=list_var_exclude,
        inplace=inplace,
    )

    return data

value_smic

value_smic(year: int) -> float

Calculates the value of the SMIC for the given year.

Parameters:

Name Type Description Default
year int

The year for which the SMIC value is calculated.

required

Returns:

Type Description
float

The value of the SMIC for the given year.

Source code in bozio_wasmer_simulations/simulation/theoretical/base.py
def value_smic(self, year: int) -> float:
    """
    Calculates the value of the SMIC for the given year.

    Args:
        year (int):
            The year for which the SMIC value is calculated.

    Returns:
        (float): The value of the SMIC for the given year.
    """
    # Initialisation du système socio-fiscal contenant les valeurs de SMIC en paramètres
    tax_benefit_system = FranceTaxBenefitSystem()
    value_smic = sum(
        [
            tax_benefit_system.get_parameters_at_instant(
                instant=f"{year}-{month}"
            ).marche_travail.salaire_minimum.smic.smic_b_mensuel
            for month in [str(m).zfill(2) for m in range(1, 13)]
        ]
    )
    # Logging
    self.logger.info(f"The SMIC value computed for {year} is {value_smic} €")

    return value_smic