From ad6eed8595cf895a00282b6bf4505142424612ca Mon Sep 17 00:00:00 2001
From: Ryan McKenna <mckennar@google.com>
Date: Tue, 23 Jun 2026 16:32:46 -0700
Subject: [PATCH] Deprecate data_generation_v2.generate() in favor of
 TabularSynthesizer.

PiperOrigin-RevId: 936961615
---
 dpsynth/CHANGELOG.md => CHANGELOG.md |   4 +-
 docs/in_memory_api.md                |  64 ++++---
 docs/index.md                        |   2 +-
 docs/sitemap.md                      |   4 +-
 dpsynth/data_generation_v2.py        | 272 ++-------------------------
 tests/data_generation_v2_test.py     | 148 ++-------------
 6 files changed, 73 insertions(+), 421 deletions(-)
 rename dpsynth/CHANGELOG.md => CHANGELOG.md (90%)

diff --git a/dpsynth/CHANGELOG.md b/CHANGELOG.md
similarity index 90%
rename from dpsynth/CHANGELOG.md
rename to CHANGELOG.md
index bd13223..9648570 100644
--- a/dpsynth/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -16,8 +16,8 @@ private synthetic data.
 This first release contains code for generating differentially private synthetic
 tabular data using marginal measurement and Private-PGM inference, including:
 
--   **Two execution modes**: In-memory local mode (via `dpsynth.generate()`,
-    tested up to ~100M rows) and a distributed Apache Beam mode for larger
+-   **Two execution modes**: In-memory local mode
+    (via `dpsynth.TabularSynthesizer`, tested up to ~100M rows) and a
     workloads.
 -   **Marginal-based mechanisms**: AIM, MST, Independent, and Direct mechanisms
     for selecting and measuring marginals under differential privacy.
diff --git a/docs/in_memory_api.md b/docs/in_memory_api.md
index a9868a2..9dc4fd4 100644
--- a/docs/in_memory_api.md
+++ b/docs/in_memory_api.md
@@ -11,32 +11,31 @@ within a single machine's RAM.
 
 --------------------------------------------------------------------------------
 
-## Python API: `dpsynth.generate`
+## Python API: `dpsynth.TabularSynthesizer`
 
-The primary entry point for in-memory synthesis is `dpsynth.generate()`. It
-accepts a Pandas DataFrame alongside a dictionary of attribute domains and
-returns a fully synthetic, differentially private DataFrame matching the exact
-schema and data types of your input.
+The primary entry point for in-memory synthesis is
+`dpsynth.TabularSynthesizer`. It accepts a dictionary of attribute domains,
+is calibrated with a privacy budget, and generates a fully synthetic,
+differentially private DataFrame matching the exact schema and data types of
+your input.
 
-### Function Signature
+### Usage
 
 ```python
 import dpsynth
 from dpsynth import discrete_mechanisms
+import numpy as np
 import pandas as pd
 
-synthetic_df = dpsynth.generate(
-    data: pd.DataFrame,
-    domains: dict[str, dpsynth.domain.AttributeType],
-    epsilon: float,
-    delta: float,
-    *,
-    discrete_config: discrete_mechanisms.DiscreteMechanismConfig = discrete_mechanisms.MSTConfig(),
-    numerical_bins: int = 32,
-    one_way_marginal_budget_fraction: float = 0.1,
-    cross_attribute_constraints: list = (),
-    skip_compression: bool = False,
-) -> pd.DataFrame
+synth = dpsynth.TabularSynthesizer(
+    domains=domains,
+    discrete_mechanism=discrete_mechanisms.MSTMechanism(),
+)
+result = synth.calibrate(
+    epsilon=1.0,
+    delta=1e-6,
+)(np.random.default_rng(), sensitive_df)
+synthetic_df = result.synthetic_data
 ```
 
 ### Key Arguments
@@ -70,6 +69,7 @@ synthetic records.
 import dpsynth
 from dpsynth import discrete_mechanisms
 from dpsynth import domain
+import numpy as np
 import pandas as pd
 
 # 1. Load sensitive tabular data into Pandas
@@ -78,23 +78,25 @@ sensitive_df = pd.read_csv("sensitive_transactions.csv")
 # 2. Load domain schema from YAML
 attribute_domains = domain.from_yaml_file("transaction_domain.yaml")
 
-# 3. Configure the synthesis mechanism (AIM)
-aim_config = discrete_mechanisms.AIMConfig(
-    seed=42,
-    rounds=50,
-    pgm_iters=1000,
-)
-
-# 4. Generate Differentially Private synthetic data
-synthetic_df = dpsynth.generate(
-    data=sensitive_df,
+# 3. Configure and calibrate the synthesizer (AIM)
+synth = dpsynth.TabularSynthesizer(
     domains=attribute_domains,
+    discrete_mechanism=discrete_mechanisms.AIMConfig(
+        seed=42,
+        rounds=50,
+        pgm_iters=1000,
+    ),
+)
+calibrated = synth.calibrate(
     epsilon=1.0,
     delta=1e-6,
-    discrete_config=aim_config,
-    numerical_bins=16, # Use 16 quantile buckets for numerical columns
+    numerical_bins=16,  # Use 16 quantile buckets for numerical columns
 )
 
+# 4. Generate Differentially Private synthetic data
+result = calibrated(np.random.default_rng(), sensitive_df)
+synthetic_df = result.synthetic_data
+
 # 5. Save the synthetic dataframe
 synthetic_df.to_csv("synthetic_transactions.csv", index=False)
 print("Synthetic data successfully generated!")
@@ -139,7 +141,7 @@ python3 bin/main.py \
 
 ## Under the Hood: The In-Memory Lifecycle
 
-When you invoke `dpsynth.generate()`, the library performs the following
+When you invoke `TabularSynthesizer`, the library performs the following
 single-machine pipeline:
 
 1.  **Discretization**: Continuous numerical columns are bucketed into
diff --git a/docs/index.md b/docs/index.md
index b19fae7..d336613 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -58,7 +58,7 @@ dataframes to massive distributed datasets across computing clusters:
                └────────────────────────────────────────┘
 ```
 
-### 1. In-Memory DataFrame API (`dpsynth.generate`)
+### 1. In-Memory DataFrame API (`dpsynth.TabularSynthesizer`)
 
 Optimized for rapid prototyping, research experimentation, and datasets that
 easily fit within single-machine memory.
diff --git a/docs/sitemap.md b/docs/sitemap.md
index 5bac456..058eb24 100644
--- a/docs/sitemap.md
+++ b/docs/sitemap.md
@@ -11,7 +11,7 @@
 
 *   [Why DPSynth?](index.md#why-dpsynth)
 *   [Core APIs and Execution Models](index.md#core-apis-and-execution-models)
-    *   [1. In-Memory DataFrame API (`dpsynth.generate`)](index.md#1-in-memory-dataframe-api-dpsynthgenerate)
+    *   [1. In-Memory DataFrame API (`dpsynth.TabularSynthesizer`)](index.md#1-in-memory-dataframe-api-dpsynth-tabularsynthesizer)
     *   [2. Scalable PipelineBackend API (`dpsynth.data_generation`)](index.md#2-scalable-pipelinebackend-api-dpsynthdata_generation)
 *   [Documentation Sitemap & Navigation](index.md#documentation-sitemap--navigation)
 *   [Supported Synthesis Algorithms](index.md#supported-synthesis-algorithms)
@@ -46,7 +46,7 @@
 <details>
 <summary>📁 <a href="in_memory_api.md">In-Memory DataFrame API Guide</a></summary>
 
-*   [Python API: `dpsynth.generate`](in_memory_api.md#python-api-dpsynthgenerate)
+*   [Python API: `dpsynth.TabularSynthesizer`](in_memory_api.md#python-api-dpsynth-tabularsynthesizer)
     *   [Function Signature](in_memory_api.md#function-signature)
     *   [Key Arguments](in_memory_api.md#key-arguments)
 *   [End-to-End Python Example](in_memory_api.md#end-to-end-python-example)
diff --git a/dpsynth/data_generation_v2.py b/dpsynth/data_generation_v2.py
index 06a6f8c..aeb8a6d 100644
--- a/dpsynth/data_generation_v2.py
+++ b/dpsynth/data_generation_v2.py
@@ -12,94 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Implementation of an end-to-end DP synthetic data generation mechanism.
+"""Deprecated shim.
 
-.. deprecated::
-    This module is deprecated. Use
-    :class:`dpsynth.data_generation_v3.TabularSynthesizer`
-    instead.
+Use :class:`dpsynth.data_generation_v3.TabularSynthesizer` instead.
 """
 
 from collections.abc import Mapping, Sequence
-from typing import TypeAlias
 import warnings
 
-from absl import logging
-import dp_accounting
 from dpsynth import constraints
 from dpsynth import discrete_mechanisms
 from dpsynth import domain
-from dpsynth import transformations
-from dpsynth.discrete_mechanisms import accounting
-from dpsynth.discrete_mechanisms import common
-from dpsynth.pipeline_transformations import categorical_values_derivation
-from dpsynth.pipeline_transformations import dp_auto_discretizer
-import mbi
+from dpsynth.data_generation_v3 import TabularSynthesizer
 import numpy as np
 import pandas as pd
-import pipeline_dp
-
-Dataset: TypeAlias = pd.DataFrame
-
-
-def _compress_data(data, one_way_measurements):
-  """Compresses the domain and measurements if necessary."""
-  compressed_domain, compressed_one_way_measurements, compress_transforms = (
-      common.get_domain_compression_transformations(one_way_measurements)
-  )
-
-  total_measurement = common.convert_to_total_measurement(one_way_measurements)
-
-  logging.info(
-      '[SynthKit Tabular]: Estimated Total %d',
-      total_measurement.noisy_measurement,
-  )
-  compressed_data = mbi.Dataset(
-      transformations.apply(data.df, compress_transforms),
-      compressed_domain,
-  )
-  logging.info('[SynthKit Tabular]: Original domain: %s', data.domain)
-  logging.info('[SynthKit Tabular]: Compressed domain: %s', compressed_domain)
-
-  measurements = [total_measurement] + list(compressed_one_way_measurements)
-  return compressed_data, measurements, compress_transforms
-
-
-def _compute_privacy_parameters(
-    epsilon: float,
-    delta: float,
-    one_way_marginal_budget_fraction: float,
-    discrete_config: discrete_mechanisms.DiscreteMechanism,
-) -> tuple[float, float]:
-  """Compute privacy parameters for one-way marginals and discrete mechanism."""
-
-  one_way_marginal_sigma = dp_accounting.get_sigma_gaussian(
-      epsilon=one_way_marginal_budget_fraction * epsilon,
-      delta=one_way_marginal_budget_fraction * delta,
-  )
-  one_way_marginal_gdp_mu = 1.0 / one_way_marginal_sigma**2
-
-  def make_event_from_param(zcdp_rho):
-    event1 = dp_accounting.GaussianDpEvent(one_way_marginal_sigma)
-    event2 = discrete_config.calibrate(zcdp_rho=zcdp_rho).dp_event
-    return dp_accounting.ComposedDpEvent([event1, event2])
-
-  if isinstance(
-      discrete_config.calibrate(zcdp_rho=1.0).dp_event, dp_accounting.ZCDpEvent
-  ):
-    make_fresh_accountant = dp_accounting.rdp.RdpAccountant
-  else:
-    make_fresh_accountant = dp_accounting.pld.PLDAccountant
-
-  discrete_mechanism_zcdp_rho = dp_accounting.calibrate_dp_mechanism(
-      make_event_from_param=make_event_from_param,
-      target_epsilon=0.9 * epsilon,
-      target_delta=0.9 * delta,
-      make_fresh_accountant=make_fresh_accountant,
-      bracket_interval=dp_accounting.LowerEndpointAndGuess(1e-3, 1.0),
-  )
-
-  return one_way_marginal_gdp_mu, discrete_mechanism_zcdp_rho
 
 
 def generate(
@@ -108,191 +34,31 @@ def generate(
     epsilon: float,
     delta: float,
     *,
-    discrete_config: discrete_mechanisms.DiscreteMechanism = discrete_mechanisms.MSTMechanism(),
+    discrete_config: (
+        discrete_mechanisms.DiscreteMechanism
+    ) = discrete_mechanisms.MSTMechanism(),
     numerical_bins: int = 32,
     one_way_marginal_budget_fraction: float = 0.1,
     cross_attribute_constraints: Sequence[constraints.Constraint] = (),
     skip_compression: bool = False,
 ) -> pd.DataFrame:
-  """Generate synthetic data with record-level differential privacy.
-
-  Ths function encodes the input categorical and numerical data into a
-  discrete domain, then runs the specified mechanism on the discretized data.
-  Finally, it converts the synthetic data back to the original domain.
-
-  Args:
-    data: The dataset to generate synthetic data for.
-    domains: A mapping from column names to attribute domains. Every key in this
-      mapping must be a column of `data`.
-    epsilon: Privacy parameter.
-    delta: Privacy parameter.
-    discrete_config: The mechanism configuration for the discretized and
-      integer-encoded data.
-    numerical_bins: The number of bins to use for discretization.
-    one_way_marginal_budget_fraction: The fraction of the total privacy budget
-      to use for one-way marginal queries.
-    cross_attribute_constraints: Constraints to enforce on the generated data.
-    skip_compression: Whether to skip the domain compression step.
-
-  Returns:
-    A synthetic dataset.
-  """
+  """Deprecated. Use :class:`data_generation_v3.TabularSynthesizer` instead."""
   warnings.warn(
       'data_generation_v2.generate() is deprecated. Use'
       ' data_generation_v3.TabularSynthesizer instead.',
       DeprecationWarning,
       stacklevel=2,
   )
-  assert 0 <= one_way_marginal_budget_fraction <= 1
-  if not skip_compression and cross_attribute_constraints:
-    raise ValueError(
-        'Compression is not supported when cross-attribute constraints are'
-        ' provided.'
-    )
-  for col in domains:
-    if col not in data.columns:
-      raise ValueError(
-          f'{col=} not found in the dataset. Available columns: {data.columns}'
-      )
-    if isinstance(domains[col], domain.FreeFormTextAttribute):
-      raise ValueError(
-          f'FreeFormTextAttribute is not supported for column {col!r}.'
-          ' Free-form text attributes cannot be synthesized by this mechanism.'
-      )
-
-  backend = pipeline_dp.LocalBackend()
-
-  # only for initialization (numerical + unknown domain categorical)
-  accountant = pipeline_dp.NaiveBudgetAccountant(0.1 * epsilon, 0.1 * delta)
-  engine = pipeline_dp.DPEngine(accountant, backend)
-  # for remainder of mechanism, not going through pipeline_dp accounting
-
-  one_way_marginal_gdp_mu, discrete_zcdp_rho = _compute_privacy_parameters(
-      0.9 * epsilon,
-      0.9 * delta,
-      one_way_marginal_budget_fraction,
-      discrete_config,
-  )
-
-  ##################################################
-  # Map the data to a standardized discrete domain #
-  ##################################################
-  transform_fns = {}
-  discrete_domains = {}
-
-  numerical_attributes = {
-      col: dom
-      for col, dom in domains.items()
-      if isinstance(dom, domain.NumericalAttribute)
-  }
-  open_set_categorical_attributes = [
-      col
-      for col, dom in domains.items()
-      if isinstance(dom, domain.OpenSetCategoricalAttribute)
-  ]
-  if numerical_attributes:
-    # dp_auto_discretizer does not currently handle empty dict here.
-    output_numerical = (
-        dp_auto_discretizer.create_transformations_via_dp_quantiles(
-            pcol=(dict(s) for _, s, in data.iterrows()),
-            engine=engine,
-            backend=backend,
-            field_name_to_attribute=numerical_attributes,
-            num_quanitle_buckets=numerical_bins,
-        )
-    )
-  else:
-    output_numerical = None
-
-  if open_set_categorical_attributes:
-    output_categorical = (
-        categorical_values_derivation.derive_categorical_values(
-            input_data=(dict(s) for _, s, in data.iterrows()),
-            backend=backend,
-            dp_engine=engine,
-            attribute_keys_to_derive=list(open_set_categorical_attributes),
-        )
-    )
-    logging.info('output_categorical: %s', output_categorical)
-  else:
-    output_categorical = None
-
-  accountant.compute_budgets()
-  if output_numerical is not None:
-    for field_name, cat_attr, to_categorical in output_numerical:
-      logging.info('Discretizing numerical column: %s', field_name)
-      to_standardized = transformations.discrete_encoder(cat_attr)
-      transform_fns[field_name] = to_standardized @ to_categorical
-      discrete_domains[field_name] = cat_attr.size
-
-  if output_categorical is not None:
-    for field_name, cat_attr in list(output_categorical)[0].items():
-      logging.info('Deriving categorical column: %s', field_name)
-      transform_fns[field_name] = transformations.discrete_encoder(cat_attr)
-      discrete_domains[field_name] = cat_attr.size
-
-  categorical_attributes = {
-      col: dom
-      for col, dom in domains.items()
-      if isinstance(dom, domain.CategoricalAttribute)
-  }
-  for col, attr in categorical_attributes.items():
-    logging.info('Encoding categorical column: %s', col)
-    transform_fns[col] = transformations.discrete_encoder(attr)
-    discrete_domains[col] = attr.size
-
-  discrete = {}
-  for col in discrete_domains:
-    logging.info('Encoding categorical column: %s', col)
-    dtype = np.min_scalar_type(discrete_domains[col])
-    values = data[col].map(transform_fns[col].transform).values
-    discrete[col] = values.astype(dtype)
-
-  discrete = mbi.Dataset(discrete, mbi.Domain.fromdict(discrete_domains))
-
-  logging.info('[SynthKit Tabular]: Finished encoding data.')
-
-  #######################################################################
-  # Measure 1-way marginals and compress domain by merging rare values. #
-  #######################################################################
-  one_way_marginal_queries = [(col,) for col in discrete.domain]
-  gdp_sigma = accounting.gdp_gaussian_sigma(one_way_marginal_gdp_mu)
-  rng = np.random.default_rng()
-  one_way_measurements = common.measure_marginals_with_noise(
-      rng, discrete, one_way_marginal_queries, gdp_sigma
-  )
-  logging.info('[SynthKit Tabular]: Measured one-way marginals.')
-
-  if not skip_compression:
-    discrete, one_way_measurements, compress_transforms = _compress_data(
-        discrete, one_way_measurements
-    )
-    for col in compress_transforms:
-      transform_fns[col] = compress_transforms[col] @ transform_fns[col]
-
-  # Run the mechanism on the discretized data.
-  initial_potentials = constraints.get_initial_parameters(
-      cross_attribute_constraints, discrete.domain
+  del skip_compression  # Not supported by TabularSynthesizer.
+  synth = TabularSynthesizer(
+      domains=domains,
+      discrete_mechanism=discrete_config,
+      cross_attribute_constraints=cross_attribute_constraints,
   )
-
-  result = discrete_config.calibrate(zcdp_rho=discrete_zcdp_rho)(
-      rng,
-      data=discrete,
-      initial_measurements=one_way_measurements,
-      initial_potentials=initial_potentials,
-  )
-
-  synthetic_data = result.synthetic_data
-  logging.info('[SynthKit Tabular]: Generated discrete synthetic data.')
-
-  # Convert synthetic data back to the original domain.
-  synthetic_columns = {}
-  for col in transform_fns:
-    synthetic_columns[col] = pd.Series(
-        [transform_fns[col].inverse(x) for x in synthetic_data.df[col]],
-        dtype=data[col].dtype,
-    )
-  logging.info('[SynthKit Tabular]: Converted data back to original domain.')
-
-  column_order = [col for col in data.columns if col in domains]
-  return pd.DataFrame(synthetic_columns)[column_order]
+  result = synth.calibrate(
+      epsilon=epsilon,
+      delta=delta,
+      numerical_bins=numerical_bins,
+      init_budget_fraction=one_way_marginal_budget_fraction,
+  )(np.random.default_rng(), data)
+  return result.synthetic_data
diff --git a/tests/data_generation_v2_test.py b/tests/data_generation_v2_test.py
index 412e65b..d94dc24 100644
--- a/tests/data_generation_v2_test.py
+++ b/tests/data_generation_v2_test.py
@@ -12,149 +12,33 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+"""Smoke test for the deprecated data_generation_v2 shim."""
+
+import warnings
+
 from absl.testing import absltest
-from dpsynth import constraints
 from dpsynth import data_generation_v2
 from dpsynth import domain
 import pandas as pd
 
 
-class MechanismTest(absltest.TestCase):
-
-  def test_end_to_end_categorical(self):
-    attribute_domains = {
-        "A": domain.CategoricalAttribute(
-            possible_values=["a", "b", "c"], out_of_domain_index=0
-        ),
-        "B": domain.CategoricalAttribute(
-            possible_values=["x", "y", "z"], out_of_domain_index=0
-        ),
-        "C": domain.OpenSetCategoricalAttribute(),
-    }
-
-    values = [
-        ["a", "x", "4"],
-        ["b", "y", "4"],
-        ["c", "z", "4"],
-    ]
-
-    df = pd.DataFrame(data=values, columns=["A", "B", "C"])
-    synthetic_df = data_generation_v2.generate(
-        df,
-        attribute_domains,
-        epsilon=100,
-        delta=0.1,
-        skip_compression=True,
-    )
-    self.assertIsInstance(synthetic_df, pd.DataFrame)
-
-  def test_end_to_end_numerical(self):
-    attribute_domains = {
-        "A": domain.NumericalAttribute(min_value=0, max_value=10),
-        "B": domain.NumericalAttribute(min_value=-10, max_value=10),
-    }
-
-    values = [
-        [5, 5],
-        [5, -10],
-        [0, -5],
-    ]
+class DeprecationShimTest(absltest.TestCase):
 
-    df = pd.DataFrame(data=values, columns=["A", "B"], dtype=float)
-    synthetic_df = data_generation_v2.generate(df, attribute_domains, 1000, 0.1)
-    self.assertListEqual(synthetic_df.columns.tolist(), ["A", "B"])
-    for col in attribute_domains:
-      dom = attribute_domains[col]
-      left, right = dom.min_value, dom.max_value
-      self.assertTrue(synthetic_df[col].between(left, right).all())
-
-  def test_end_to_end_categorical_with_constraint(self):
+  def test_generate_emits_deprecation_warning(self):
     attribute_domains = {
-        "A": domain.CategoricalAttribute(
-            possible_values=["a", "b", "c"], out_of_domain_index=0
-        ),
-        "B": domain.CategoricalAttribute(
-            possible_values=["x", "y", "z"], out_of_domain_index=0
+        'A': domain.CategoricalAttribute(
+            possible_values=['a', 'b', 'c'], out_of_domain_index=0
         ),
     }
-
-    constraint = constraints.Constraint(
-        attribute_names=("A", "B"),
-        attribute_domains=(
-            attribute_domains["A"],
-            attribute_domains["B"],
-        ),
-        possible_combinations=[
-            ("a", "x"),
-            ("b", "y"),
-            ("c", "z"),
-        ],
-    )
-
-    values = [
-        ["a", "x"],
-        ["b", "y"],
-        ["c", "z"],
-        ["a", "y"],
-        ["b", "x"],
-        ["c", "x"],
-    ]
-
-    df = pd.DataFrame(data=values, columns=["A", "B"])
-    synthetic_df = data_generation_v2.generate(
-        df,
-        attribute_domains,
-        epsilon=1.0,
-        delta=1e-5,
-        discrete_config=data_generation_v2.discrete_mechanisms.MSTConfig(),
-        cross_attribute_constraints=[constraint],
-        skip_compression=True,
-    )
-
-    def is_valid(row):
-      return (row["A"], row["B"]) in constraint.possible_combinations
-
-    self.assertTrue(synthetic_df.apply(is_valid, axis=1).all())
-
-  def test_end_to_end_mixed_domain(self):
-    attribute_domains = {
-        "A": domain.OpenSetCategoricalAttribute(),
-        "B": domain.NumericalAttribute(min_value=0, max_value=10),
-    }
-
-    values = [
-        ["a", 1],
-        ["b", 5],
-        ["c", 10],
-    ]
-
-    df = pd.DataFrame(data=values, columns=["A", "B"])
-    df["B"] = df["B"].astype(float)
-    synthetic_df = data_generation_v2.generate(
-        df,
-        attribute_domains,
-        epsilon=100,
-        delta=0.1,
-        skip_compression=True,
-    )
-    self.assertIsInstance(synthetic_df, pd.DataFrame)
-    self.assertListEqual(synthetic_df.columns.tolist(), ["A", "B"])
-    dom_b = attribute_domains["B"]
-    self.assertTrue(
-        synthetic_df["B"].between(dom_b.min_value, dom_b.max_value).all()
-    )
-
-  def test_raises_on_freeform_text_attribute(self):
-    attribute_domains = {
-        "A": domain.CategoricalAttribute(possible_values=["a", "b"]),
-        "text": domain.FreeFormTextAttribute(max_tokens=128),
-    }
-    df = pd.DataFrame({"A": ["a", "b"], "text": ["hello", "world"]})
-    with self.assertRaises(ValueError):
-      data_generation_v2.generate(
-          df, attribute_domains, epsilon=1.0, delta=1e-5
+    df = pd.DataFrame({'A': ['a', 'b', 'c']})
+    with warnings.catch_warnings(record=True) as w:
+      warnings.simplefilter('always')
+      synthetic_df = data_generation_v2.generate(
+          df, attribute_domains, epsilon=100, delta=0.1, skip_compression=True
       )
+    self.assertTrue(any(issubclass(x.category, DeprecationWarning) for x in w))
+    self.assertIsInstance(synthetic_df, pd.DataFrame)
 
 
-if __name__ == "__main__":
+if __name__ == '__main__':
   absltest.main()