{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Toy data generation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook generates the toy data used in the following examples. It's not necessary to run this notebook as the toy data generation will be executed in the background if needed. This notebook illustrates the properties of the toy dataset." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "from pylorentz import Momentum4\n", "\n", "from freeforestml import Variable, Process, Cut, hist, McStack\n", "from freeforestml.toydata import generate, proposal, mcmc_step, vbfh_pdf, mcmc, ztt_pdf" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# shuffle=False allows plotting the MC walk\n", "%time df = generate(10000, vbfh_frac=0.5, shuffle=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Markov chain walk" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "idx = df.fpid == 1\n", "fig, ax = plt.subplots(2, 1, sharex=True)\n", "ax[0].set_title(\"VBFH\")\n", "ax[0].plot(np.arange(sum(idx)), df.jet_1_pt[idx], label=\"Jet 1\")\n", "ax[0].plot(np.arange(sum(idx)), df.jet_2_pt[idx], label=\"Jet 2\")\n", "ax[0].set_ylabel(r\"$p_{\\mathrm{T}}$\")\n", "ax[0].legend()\n", " \n", "ax[1].set_title(r\"$Z\\rightarrow\\tau\\tau$\")\n", "ax[1].plot(np.arange(sum(~idx)), df.jet_1_pt[~idx], label=\"Jet 1\")\n", "ax[1].plot(np.arange(sum(~idx)), df.jet_2_pt[~idx], label=\"Jet 2\")\n", "ax[1].set_ylabel(r\"$p_{\\mathrm{T}}$\")\n", "ax[1].legend()\n", "fig.tight_layout()\n", "None" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fig, ax = plt.subplots(2, 1, sharex=True)\n", "ax[0].set_title(\"VBFH\")\n", "ax[0].plot(np.arange(sum(idx)), df.jet_1_eta[idx].abs(), label=\"Jet 1\")\n", "ax[0].plot(np.arange(sum(idx)), df.jet_2_eta[idx].abs(), label=\"Jet 2\")\n", "ax[0].set_ylabel(r\"$|\\eta_j|$\")\n", "ax[0].legend()\n", " \n", "ax[1].set_title(r\"$Z\\rightarrow\\tau\\tau$\")\n", "ax[1].plot(np.arange(sum(~idx)), df.jet_1_eta[~idx].abs(), label=\"Jet 1\")\n", "ax[1].plot(np.arange(sum(~idx)), df.jet_2_eta[~idx].abs(), label=\"Jet 2\")\n", "ax[1].set_ylabel(r\"$|\\eta_j|$\")\n", "ax[1].legend()\n", "fig.tight_layout()\n", "None" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fig, ax = plt.subplots(1, 1)\n", "ax.plot(np.arange(sum(idx)), df.higgs_m[idx], label=\"VBFH\")\n", "ax.plot(np.arange(sum(~idx)), df.higgs_m[~idx], label=r\"$Z\\rightarrow\\tau\\tau$\")\n", "ax.set_ylabel(r\"$m^H$\")\n", "ax.legend()\n", "fig.tight_layout()\n", "None" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Toy data distributions" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "p_vbfh = Process(\"VBFH\", range=(1, 1))\n", "p_ztt = Process(r\"$Z\\rightarrow\\tau\\tau$\", range=(0, 0))\n", "s_all = McStack(p_vbfh, p_ztt)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "jet_1_eta = Variable(r\"$\\eta^{j_1}$\", \"jet_1_eta\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "hist(df, jet_1_eta, bins=20, range=(-6, 6), stacks=[s_all], weight=\"weight\")\n", "None" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "jet_2_eta = Variable(r\"$\\eta^{j_2}$\", \"jet_2_eta\")\n", "hist(df, jet_2_eta, bins=20, range=(-6, 6), stacks=[s_all], weight=\"weight\")\n", "None" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "delta_eta_jets = Variable(r\"$|\\Delta\\eta{jj}|$\", lambda d: d.jet_1_eta.abs())\n", "hist(df, delta_eta_jets, bins=22, range=(-1, 10), stacks=[s_all], weight=\"weight\")\n", "None" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "jet_1_pt = Variable(r\"$p_{\\mathrm{T}}^{j_1}$\", \"jet_1_pt\")\n", "jet_2_pt = Variable(r\"$p_{\\mathrm{T}}^{j_2}$\", \"jet_2_pt\")\n", "hist(df, jet_1_pt, bins=50, range=(0, 1000), stacks=[s_all], weight=\"weight\")\n", "hist(df, jet_2_pt, bins=25, range=(0, 250), stacks=[s_all], weight=\"weight\")\n", "None" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "v_higgs_m = Variable(r\"$m^H$\", \"higgs_m\")\n", "hist(df, v_higgs_m, bins=20, range=(0, 200), stacks=[s_all], weight=\"weight\")\n", "None" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "v_weight = Variable(r\"$w$\", \"weight\")\n", "hist(df, v_weight, bins=20, range=(0, 2), stacks=[s_all],\n", " weight=lambda d: d.weight * 1 + 0)\n", "None" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "v_mjj = Variable(r\"$m^{jj}$\", \"m_jj\")\n", "hist(df, v_mjj, bins=51, range=(-40, 2000), stacks=[s_all], weight=\"weight\")\n", "hist(df, v_mjj, bins=51, range=(-40, 2000), stacks=[s_all], weight=\"weight\")\n", "None" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "v_lep_pt = Variable(r\"$p^{\\ell}_{\\mathrm{T}}$\", \"lep_pt\")\n", "hist(df, v_lep_pt, bins=30, range=(-10, 290), stacks=[s_all], weight=\"weight\")\n", "None" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "v_lep_eta = Variable(r\"$|\\eta^{\\ell}|$\", \"lep_eta\")\n", "hist(df, v_lep_eta, bins=30, range=(-5, 5), stacks=[s_all], weight=\"weight\")\n", "None" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "v_tau_pt = Variable(r\"$p^{\\tau}_{\\mathrm{T}}$\", \"tau_pt\")\n", "hist(df, v_tau_pt, bins=30, range=(-10, 290), stacks=[s_all], weight=\"weight\")\n", "None" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "v_tau_eta = Variable(r\"$|\\eta^{\\tau}|$\", \"tau_eta\")\n", "hist(df, v_tau_eta, bins=30, range=(-5, 5), stacks=[s_all], weight=\"weight\")\n", "None" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "v_met_pt = Variable(r\"$E^{\\mathrm{miss}}_{\\mathrm{T}}$\", \"met_pt\")\n", "hist(df, v_met_pt, bins=30, range=(-10, 290), stacks=[s_all], weight=\"weight\")\n", "None" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "v_lep_cent = Variable(r\"$\\eta^{\\ell}$\", \"tau_centrality\")\n", "hist(df, v_lep_cent, bins=22, range=(-0.05, 1.05), stacks=[s_all], weight=\"weight\")\n", "None" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "v_tau_cent = Variable(r\"$\\eta^{\\tau}$\", \"tau_centrality\")\n", "hist(df, v_tau_cent, bins=22, range=(-0.05, 1.05), stacks=[s_all], weight=\"weight\")\n", "None" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.2" } }, "nbformat": 4, "nbformat_minor": 2 }