{ "cells": [ { "cell_type": "markdown", "id": "6105c66c", "metadata": {}, "source": [ "# Safe Support Vector Machine Notebook \n", "## A Quick Start Guide to implementing Safer Support Vector Machines\n", "### Commands commented out for path manipulation are for developers only\n" ] }, { "cell_type": "code", "execution_count": 1, "id": "82797236", "metadata": {}, "outputs": [], "source": [ "import logging\n", "import os\n", "\n", "import numpy as np\n", "from sklearn import datasets\n", "\n", "logging.basicConfig()\n", "logger = logging.getLogger(\"wrapper_svm\")\n", "logger.setLevel(logging.INFO)\n", "\n", "from sacroml.safemodel.classifiers import SafeSVC" ] }, { "cell_type": "markdown", "id": "190329f4", "metadata": {}, "source": [ "## Use the sklearn Wisconsin breast cancer dataset" ] }, { "cell_type": "code", "execution_count": 2, "id": "85af89a0", "metadata": {}, "outputs": [], "source": [ "cancer = datasets.load_breast_cancer()\n", "x = np.asarray(cancer.data, dtype=np.float64)\n", "y = np.asarray(cancer.target, dtype=np.float64)" ] }, { "cell_type": "markdown", "id": "1410e2f4", "metadata": {}, "source": [ "## Kernel for approximator: equivalent to rbf." ] }, { "cell_type": "code", "execution_count": 3, "id": "69481c5c", "metadata": {}, "outputs": [], "source": [ "def rbf(x, y, gamma=1):\n", " return np.exp(-gamma * np.sum((x - y) ** 2))\n", "\n", "\n", "def rbf_svm(x, y, gamma=1):\n", " r = np.zeros((x.shape[0], y.shape[0]))\n", " for i in range(x.shape[0]):\n", " for j in range(y.shape[0]):\n", " r[i, j] = rbf(x[i, :], y[j, :], gamma)\n", " return r" ] }, { "cell_type": "markdown", "id": "f7bdc592", "metadata": {}, "source": [ "## Set parameters" ] }, { "cell_type": "code", "execution_count": 4, "id": "7cd6c7f2", "metadata": {}, "outputs": [], "source": [ "gamma = 0.1 # Kernel width\n", "C = 1 # Penalty term\n", "dhat = 5 # Dimension of approximator\n", "eps = 500 # DP level (not very private)" ] }, { "cell_type": "markdown", "id": "cab1eed3", "metadata": {}, "source": [ "# Define Differentially Private version with DP level (approximate)" ] }, { "cell_type": "code", "execution_count": 5, "id": "1a7734dc", "metadata": {}, "outputs": [], "source": [ "clf3 = SafeSVC(eps=eps, dhat=dhat, C=C, gamma=gamma)\n", "clf3.fit(x, y)\n", "c3 = clf3.predict(x)\n", "p3 = clf3.predict_proba(x)" ] }, { "cell_type": "markdown", "id": "6c08d536", "metadata": {}, "source": [ "## Define the model and fit it.\n", "## Save and Request Release\n", "### We are warned that dhat is too low." ] }, { "cell_type": "code", "execution_count": 6, "id": "c3b7e21f", "metadata": {}, "outputs": [], "source": [ "clf3 = SafeSVC(eps=eps, dhat=dhat, C=C, gamma=gamma)\n", "clf3.fit(x, y)\n", "clf3.save(name=\"testSaveSVC.pkl\")\n", "clf3.request_release(path=\"testSaveSVC\", ext=\"pkl\")" ] }, { "cell_type": "code", "execution_count": 7, "id": "9aa22802", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "dataset_name: ''\n", "dataset_module_path: ''\n", "features: {}\n", "generalisation_error: .nan\n", "safemodel:\n", "- researcher: unknown\n", " model_type: SVC\n", " details: 'WARNING: model parameters may present a disclosure risk:\n", "\n", " - parameter dhat = 5 identified as less than the recommended min value of 1000.'\n", " recommendation: Do not allow release\n", " reason: 'WARNING: model parameters may present a disclosure risk:\n", "\n", " - parameter dhat = 5 identified as less than the recommended min value of 1000.'\n", " timestamp: '2025-12-02 21:28:03'\n", "model_type: SklearnModel\n", "model_name: SafeSVC\n", "model_params: {}\n", "model_path: model.pkl\n", "X_train_path: ''\n", "y_train_path: ''\n", "X_test_path: ''\n", "y_test_path: ''\n", "X_train_orig_path: ''\n", "y_train_orig_path: ''\n", "X_test_orig_path: ''\n", "y_test_orig_path: ''\n", "proba_train_path: ''\n", "proba_test_path: ''\n", "indices_train_path: ''\n", "indices_test_path: ''\n", "\n" ] } ], "source": [ "target_yaml = os.path.normpath(\"testSaveSVC/target.yaml\")\n", "with open(target_yaml) as f:\n", " print(f.read())" ] }, { "cell_type": "markdown", "id": "cf32d434", "metadata": {}, "source": [ "## Set Parameters to safe values" ] }, { "cell_type": "code", "execution_count": 8, "id": "87689404", "metadata": {}, "outputs": [], "source": [ "gamma = 0.1 # Kernel width\n", "C = 1 # Penalty term\n", "dhat = 1000 # Dimension of approximator\n", "eps = 500 # DP level (not very private)" ] }, { "cell_type": "markdown", "id": "75c8400a", "metadata": {}, "source": [ "## Define the model and fit it.\n", "## Save and Request Release\n", "### Model parameters are within recommended ranges. The saved model can pass through next step of checking procedure" ] }, { "cell_type": "code", "execution_count": 9, "id": "90369029", "metadata": {}, "outputs": [], "source": [ "clf3 = SafeSVC(eps=eps, dhat=dhat, C=C, gamma=gamma)\n", "clf3.fit(x, y)\n", "clf3.save(name=\"testSaveSVC.pkl\")\n", "clf3.request_release(path=\"testSaveSVC\", ext=\"pkl\")" ] }, { "cell_type": "markdown", "id": "f7ae65a4", "metadata": {}, "source": [ "## Examine the checkfile" ] }, { "cell_type": "code", "execution_count": 10, "id": "ca19340f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "dataset_name: ''\n", "dataset_module_path: ''\n", "features: {}\n", "generalisation_error: .nan\n", "safemodel:\n", "- researcher: unknown\n", " model_type: SVC\n", " details: 'Model parameters are within recommended ranges.\n", "\n", " '\n", " recommendation: Proceed to next step of checking\n", " timestamp: '2025-12-02 21:28:04'\n", "model_type: SklearnModel\n", "model_name: SafeSVC\n", "model_params: {}\n", "model_path: model.pkl\n", "X_train_path: ''\n", "y_train_path: ''\n", "X_test_path: ''\n", "y_test_path: ''\n", "X_train_orig_path: ''\n", "y_train_orig_path: ''\n", "X_test_orig_path: ''\n", "y_test_orig_path: ''\n", "proba_train_path: ''\n", "proba_test_path: ''\n", "indices_train_path: ''\n", "indices_test_path: ''\n", "\n" ] } ], "source": [ "target_yaml = os.path.normpath(\"testSaveSVC/target.yaml\")\n", "with open(target_yaml) as f:\n", " print(f.read())" ] }, { "cell_type": "code", "execution_count": null, "id": "bc6c0dfe", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 5 }