4747 ]
4848).SerializeToString ()
4949
50- GEMMA2_HASH = "61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 "
50+ GEMMA3_HASH = "1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c "
5151
5252
5353class TestGetTokenizerName (unittest .TestCase ):
@@ -58,6 +58,18 @@ def test_get_tokenizer_name_success(self):
5858 loader .get_tokenizer_name ("gemini-2.5-pro-preview-06-05" ), "gemma3"
5959 )
6060
61+ def test_get_tokenizer_name_huggingface (self ):
62+ self .assertEqual (loader .get_tokenizer_name ("gemini-3.5-flash" ), "gemma4" )
63+ self .assertEqual (
64+ loader .get_tokenizer_name ("gemini-3.1-flash-lite" ), "gemma4"
65+ )
66+ self .assertEqual (
67+ loader .get_tokenizer_name ("gemini-3.1-pro-preview" ), "gemma4"
68+ )
69+ self .assertEqual (
70+ loader .get_tokenizer_name ("gemini-4-flash-preview" ), "gemma4"
71+ )
72+
6173 def test_get_tokenizer_name_unsupported (self ):
6274 with self .assertRaisesRegex (
6375 ValueError , "Model unsupported-model is not supported"
@@ -105,9 +117,9 @@ def test_load_model_proto_from_url(
105117 ):
106118 mock_exists .return_value = False # Don't use cache
107119 self ._setup_get_mock (mock_get )
108- mock_sha256 .return_value .hexdigest .return_value = GEMMA2_HASH
120+ mock_sha256 .return_value .hexdigest .return_value = GEMMA3_HASH
109121
110- proto = loader .load_model_proto ("gemma2 " )
122+ proto = loader .load_model_proto ("gemma3 " )
111123
112124 self .assertIsInstance (proto , sentencepiece_model_pb2 .ModelProto )
113125 self .assertEqual (len (proto .pieces ), 4 )
@@ -128,9 +140,9 @@ def test_load_model_proto_from_cache(
128140 ):
129141 mock_exists .return_value = True # Use cache
130142 mock_open_func .return_value .read .return_value = FAKE_MODEL_CONTENT
131- mock_sha256 .return_value .hexdigest .return_value = GEMMA2_HASH
143+ mock_sha256 .return_value .hexdigest .return_value = GEMMA3_HASH
132144
133- proto = loader .load_model_proto ("gemma2 " )
145+ proto = loader .load_model_proto ("gemma3 " )
134146
135147 self .assertIsInstance (proto , sentencepiece_model_pb2 .ModelProto )
136148 mock_get .assert_not_called ()
@@ -154,10 +166,10 @@ def test_load_model_proto_corrupted_cache(
154166 # First hash for corrupted cache, second for good download
155167 mock_sha256 .side_effect = [
156168 MagicMock (hexdigest = MagicMock (return_value = "wrong_hash" )),
157- MagicMock (hexdigest = MagicMock (return_value = GEMMA2_HASH )),
169+ MagicMock (hexdigest = MagicMock (return_value = GEMMA3_HASH )),
158170 ]
159171
160- proto = loader .load_model_proto ("gemma2 " )
172+ proto = loader .load_model_proto ("gemma3 " )
161173
162174 self .assertIsInstance (proto , sentencepiece_model_pb2 .ModelProto )
163175 mock_remove .assert_called_once ()
@@ -180,7 +192,7 @@ def test_load_model_proto_bad_hash_from_url(
180192 with self .assertRaisesRegex (
181193 ValueError , "Downloaded model file is corrupted"
182194 ):
183- loader .load_model_proto ("gemma2 " )
195+ loader .load_model_proto ("gemma3 " )
184196
185197 def test_load_model_proto_unsupported (self , * args ):
186198 with self .assertRaisesRegex (
@@ -200,9 +212,9 @@ def test_get_sentencepiece_success(
200212 ):
201213 mock_exists .return_value = False
202214 self ._setup_get_mock (mock_get )
203- mock_sha256 .return_value .hexdigest .return_value = GEMMA2_HASH
215+ mock_sha256 .return_value .hexdigest .return_value = GEMMA3_HASH
204216
205- processor = loader .get_sentencepiece ("gemma2 " )
217+ processor = loader .get_sentencepiece ("gemma3 " )
206218
207219 self .assertIsInstance (processor , spm .SentencePieceProcessor )
208220 mock_get .assert_called_once ()
@@ -225,11 +237,32 @@ def test_get_sentencepiece_caching(
225237 ):
226238 mock_exists .return_value = False
227239 self ._setup_get_mock (mock_get )
228- mock_sha256 .return_value .hexdigest .return_value = GEMMA2_HASH
240+ mock_sha256 .return_value .hexdigest .return_value = GEMMA3_HASH
229241
230242 # Call twice
231- loader .get_sentencepiece ("gemma2 " )
232- loader .get_sentencepiece ("gemma2 " )
243+ loader .get_sentencepiece ("gemma3 " )
244+ loader .get_sentencepiece ("gemma3 " )
233245
234246 # Should only be loaded once due to lru_cache
235247 mock_get .assert_called_once ()
248+
249+
250+ class TestGetHuggingFaceTokenizer (unittest .TestCase ):
251+
252+ @patch ("genai._local_tokenizer_loader.AutoProcessor" )
253+ def test_get_huggingface_tokenizer_success (self , mock_auto_processor ):
254+ mock_processor = MagicMock ()
255+ mock_tokenizer = MagicMock ()
256+ mock_processor .tokenizer = mock_tokenizer
257+ mock_auto_processor .from_pretrained .return_value = mock_processor
258+
259+ tokenizer = loader .get_huggingface_tokenizer ("gemma4" )
260+
261+ self .assertEqual (tokenizer , mock_tokenizer )
262+ mock_auto_processor .from_pretrained .assert_called_once_with (
263+ "google/gemma-4-E4B-it"
264+ )
265+
266+ def test_get_huggingface_tokenizer_unsupported (self ):
267+ with self .assertRaises (KeyError ):
268+ loader .get_huggingface_tokenizer ("unsupported" )
0 commit comments