From 6148257b3eebbad3272005f2e49e0e9615fd8f02 Mon Sep 17 00:00:00 2001
From: "Robert (Bobby) Evans" <bobby@apache.org>
Date: Fri, 23 Aug 2024 14:40:01 -0500
Subject: [PATCH] JSON tests for corrected date, timestamp, and mixed types
 (#11388)

Signed-off-by: Robert (Bobby) Evans <bobby@apache.org>
---
 .../src/main/python/json_matrix_test.py       | 318 ++++++++++++++++--
 .../src/test/resources/escaped_strings.json   |  16 +
 .../src/test/resources/mixed_objects.json     |  35 ++
 .../resources/nested_escaped_strings.json     |  55 +++
 .../timestamp_formatted_strings.json          |  38 +++
 .../timestamp_tz_formatted_strings.json       |  12 +
 .../nvidia/spark/rapids/GpuJsonTuple.scala    |   7 +-
 7 files changed, 449 insertions(+), 32 deletions(-)
 create mode 100644 integration_tests/src/test/resources/mixed_objects.json
 create mode 100644 integration_tests/src/test/resources/nested_escaped_strings.json
 create mode 100644 integration_tests/src/test/resources/timestamp_formatted_strings.json
 create mode 100644 integration_tests/src/test/resources/timestamp_tz_formatted_strings.json

diff --git a/integration_tests/src/main/python/json_matrix_test.py b/integration_tests/src/main/python/json_matrix_test.py
index a47771f47f8..c9dec8afac9 100644
--- a/integration_tests/src/main/python/json_matrix_test.py
+++ b/integration_tests/src/main/python/json_matrix_test.py
@@ -584,7 +584,11 @@ def test_json_tuple_dec_locale_non_aribic(std_input_path):
     "int_struct_formatted.json",
     "int_mixed_array_struct_formatted.json",
     "escaped_strings.json",
-    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361'))]
+    "nested_escaped_strings.json",
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361')),
+    "mixed_objects.json",
+    "timestamp_formatted_strings.json",
+    "timestamp_tz_formatted_strings.json"]
 
 @pytest.mark.parametrize('input_file', COMMON_TEST_FILES)
 @pytest.mark.parametrize('read_func', [read_json_df]) # we have done so many tests already that we don't need both read func. They are the same
@@ -670,7 +674,11 @@ def test_from_json_longs(std_input_path, input_file):
     "int_struct_formatted.json",
     "int_mixed_array_struct_formatted.json",
     "escaped_strings.json",
-    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361'))])
+    "nested_escaped_strings.json",
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361')),
+    "mixed_objects.json",
+    "timestamp_formatted_strings.json",
+    "timestamp_tz_formatted_strings.json"])
 @pytest.mark.parametrize('read_func', [read_json_df]) # we have done so many tests already that we don't need both read func. They are the same
 def test_scan_json_decs(std_input_path, read_func, spark_tmp_table_factory, input_file, dt):
     assert_gpu_and_cpu_are_equal_collect(
@@ -695,7 +703,11 @@ def test_scan_json_decs(std_input_path, read_func, spark_tmp_table_factory, inpu
     "int_struct_formatted.json",
     "int_mixed_array_struct_formatted.json",
     "escaped_strings.json",
-    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361'))])
+    "nested_escaped_strings.json",
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361')),
+    "mixed_objects.json",
+    "timestamp_formatted_strings.json",
+    "timestamp_tz_formatted_strings.json"])
 @allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
 def test_from_json_decs(std_input_path, input_file, dt):
     schema = StructType([StructField("data", dt)])
@@ -719,7 +731,11 @@ def test_from_json_decs(std_input_path, input_file, dt):
     "int_struct_formatted.json",
     pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(condition=is_spark_400_or_later(), reason='https://github.com/NVIDIA/spark-rapids/issues/11154')),
     "escaped_strings.json",
-    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361'))])
+    pytest.param("nested_escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10534')),
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361')),
+    "mixed_objects.json",
+    "timestamp_formatted_strings.json",
+    "timestamp_tz_formatted_strings.json"])
 @pytest.mark.parametrize('read_func', [read_json_df])
 def test_scan_json_strings(std_input_path, read_func, spark_tmp_table_factory, input_file):
     assert_gpu_and_cpu_are_equal_collect(
@@ -743,7 +759,11 @@ def test_scan_json_strings(std_input_path, read_func, spark_tmp_table_factory, i
     "int_struct_formatted.json",
     "int_mixed_array_struct_formatted.json",
     "escaped_strings.json",
-    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361'))])
+    pytest.param("nested_escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10534')),
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361')),
+    "mixed_objects.json",
+    "timestamp_formatted_strings.json",
+    "timestamp_tz_formatted_strings.json"])
 @allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
 def test_from_json_strings(std_input_path, input_file):
     schema = StructType([StructField("data", StringType())])
@@ -765,12 +785,19 @@ def test_from_json_strings(std_input_path, input_file):
     "int_array_formatted.json",
     "int_struct_formatted.json",
     "int_mixed_array_struct_formatted.json",
-    pytest.param("escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10196')),
-    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361'))])
+    pytest.param("escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11386')),
+    pytest.param("nested_escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11387')),
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361')),
+    "mixed_objects.json",
+    "timestamp_formatted_strings.json",
+    "timestamp_tz_formatted_strings.json"])
 @allow_non_gpu(TEXT_INPUT_EXEC)
 def test_get_json_object_formats(std_input_path, input_file):
    assert_gpu_and_cpu_are_equal_collect(
-           lambda spark : read_json_as_text(spark, std_input_path + '/' + input_file, "json").selectExpr("*", '''get_json_object(json, "$.data")'''))
+           lambda spark : read_json_as_text(spark, std_input_path + '/' + input_file, "json").selectExpr("*",
+               '''get_json_object(json, "$.data")''',
+               '''get_json_object(json, '$.id')''',
+               '''get_json_object(json, '$.name')'''))
 
 @pytest.mark.parametrize('input_file', [
     "int_formatted.json",
@@ -787,11 +814,28 @@ def test_get_json_object_formats(std_input_path, input_file):
     "int_struct_formatted.json",
     "int_mixed_array_struct_formatted.json",
     "escaped_strings.json",
-    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361'))])
+    "nested_escaped_strings.json",
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361')),
+    "mixed_objects.json",
+    "timestamp_formatted_strings.json",
+    "timestamp_tz_formatted_strings.json"])
 @allow_non_gpu(TEXT_INPUT_EXEC)
 def test_get_json_object_child_formats(std_input_path, input_file):
    assert_gpu_and_cpu_are_equal_collect(
-           lambda spark : read_json_as_text(spark, std_input_path + '/' + input_file, "json").selectExpr("*", '''get_json_object(json, "$.data.a")'''))
+           lambda spark : read_json_as_text(spark, std_input_path + '/' + input_file, "json").selectExpr("*", 
+               '''get_json_object(json, "$.data.a")''',
+               '''get_json_object(json, '$.tags[0]')''',
+               '''get_json_object(json, '$.details.address.city')''',
+               '''get_json_object(json, '$.user.profile.username')''',
+               '''get_json_object(json, '$.user.skills[0]')''',
+               '''get_json_object(json, '$.user.projects[1].name')''',
+               '''get_json_object(json, '$.departments[0].employees[1].name')''',
+               '''get_json_object(json, '$.departments[1].employees[0].id')''',
+               '''get_json_object(json, '$.data.numeric')''',
+               '''get_json_object(json, '$.data.details.timestamp')''',
+               '''get_json_object(json, '$.data.details.list[1]')''',
+               '''get_json_object(json, '$.company.departments[1].employees[0].name')''',
+               '''get_json_object(json, '$.company.departments[0].employees[1].role')'''))
 
 @pytest.mark.parametrize('input_file', [
     "int_formatted.json",
@@ -807,12 +851,20 @@ def test_get_json_object_child_formats(std_input_path, input_file):
     "int_array_formatted.json",
     "int_struct_formatted.json",
     "int_mixed_array_struct_formatted.json",
-    pytest.param("escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10196')),
-    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361'))])
+    pytest.param("escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11386')),
+    pytest.param("nested_escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11387')),
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361')),
+    "mixed_objects.json",
+    "timestamp_formatted_strings.json",
+    "timestamp_tz_formatted_strings.json"])
 @allow_non_gpu(TEXT_INPUT_EXEC)
 def test_json_tuple_formats(std_input_path, input_file):
     assert_gpu_and_cpu_are_equal_collect(
-        lambda spark : read_json_as_text(spark, std_input_path + '/' + input_file, "json").selectExpr("*", '''json_tuple(json, "data")'''),
+        lambda spark : read_json_as_text(spark, std_input_path + '/' + input_file, "json").selectExpr("*",
+            '''json_tuple(json, "data")''').selectExpr("*",
+                # json_tuple is not the same as get_json_object
+            '''json_tuple(json, 'id', 'name', 'details.address.city') AS (id, name, city)''').selectExpr("*",
+            '''json_tuple(json, 'user.profile.username', 'user.skills[0]', 'user.projects[1].name') AS (username, first_skill, second_project_name)'''),
         conf =_enable_json_tuple_conf)
 
 @pytest.mark.parametrize('input_file', COMMON_TEST_FILES)
@@ -847,7 +899,11 @@ def test_from_json_bools(std_input_path, input_file):
     "int_struct_formatted.json",
     "int_mixed_array_struct_formatted.json",
     "escaped_strings.json",
-    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361'))])
+    "nested_escaped_strings.json",
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361')),
+    "mixed_objects.json",
+    "timestamp_formatted_strings.json",
+    "timestamp_tz_formatted_strings.json"])
 @pytest.mark.parametrize('read_func', [read_json_df])
 def test_scan_json_floats(std_input_path, read_func, spark_tmp_table_factory, input_file):
     assert_gpu_and_cpu_are_equal_collect(
@@ -871,7 +927,11 @@ def test_scan_json_floats(std_input_path, read_func, spark_tmp_table_factory, in
     "int_struct_formatted.json",
     "int_mixed_array_struct_formatted.json",
     "escaped_strings.json",
-    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361'))])
+    "nested_escaped_strings.json",
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361')),
+    "mixed_objects.json",
+    "timestamp_formatted_strings.json",
+    "timestamp_tz_formatted_strings.json"])
 @allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
 def test_from_json_floats(std_input_path, input_file):
     schema = StructType([StructField("data", FloatType())])
@@ -894,7 +954,11 @@ def test_from_json_floats(std_input_path, input_file):
     "int_struct_formatted.json",
     "int_mixed_array_struct_formatted.json",
     "escaped_strings.json",
-    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361'))])
+    "nested_escaped_strings.json",
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361')),
+    "mixed_objects.json",
+    "timestamp_formatted_strings.json",
+    "timestamp_tz_formatted_strings.json"])
 @pytest.mark.parametrize('read_func', [read_json_df])
 def test_scan_json_doubles(std_input_path, read_func, spark_tmp_table_factory, input_file):
     assert_gpu_and_cpu_are_equal_collect(
@@ -918,7 +982,11 @@ def test_scan_json_doubles(std_input_path, read_func, spark_tmp_table_factory, i
     "int_struct_formatted.json",
     "int_mixed_array_struct_formatted.json",
     "escaped_strings.json",
-    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361'))])
+    "nested_escaped_strings.json",
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361')),
+    "mixed_objects.json",
+    "timestamp_formatted_strings.json",
+    "timestamp_tz_formatted_strings.json"])
 @allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
 def test_from_json_doubles(std_input_path, input_file):
     schema = StructType([StructField("data", DoubleType())])
@@ -926,6 +994,118 @@ def test_from_json_doubles(std_input_path, input_file):
         lambda spark : read_json_as_text(spark, std_input_path + '/' + input_file, "json").select(f.col('json'), f.from_json(f.col('json'), schema)),
         conf =_enable_json_to_structs_conf)
 
+@pytest.mark.parametrize('input_file', [
+    "int_formatted.json",
+    "float_formatted.json",
+    "sci_formatted.json",
+    pytest.param("int_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/9664')),
+    "float_formatted_strings.json",
+    "sci_formatted_strings.json",
+    pytest.param("decimal_locale_formatted_strings.json", marks=pytest.mark.xfail(condition=is_before_spark_330(), reason='https://github.com/NVIDIA/spark-rapids/issues/11390')),
+    "single_quoted_strings.json",
+    "boolean_formatted.json",
+    "int_array_formatted.json",
+    "int_struct_formatted.json",
+    "int_mixed_array_struct_formatted.json",
+    pytest.param("escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/9664')),
+    "nested_escaped_strings.json",
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361')),
+    "mixed_objects.json",
+    pytest.param("timestamp_formatted_strings.json", marks=pytest.mark.xfail(condition=is_before_spark_330(), reason='https://github.com/NVIDIA/spark-rapids/issues/11391')),
+    pytest.param("timestamp_tz_formatted_strings.json", marks=pytest.mark.xfail(condition=is_before_spark_330(), reason='https://github.com/NVIDIA/spark-rapids/issues/11391'))])
+@pytest.mark.parametrize('read_func', [read_json_df])
+@allow_non_gpu(*non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
+def test_scan_json_corrected_dates(std_input_path, read_func, spark_tmp_table_factory, input_file):
+    conf = copy_and_update(_enable_all_types_json_scan_conf, {"spark.sql.legacy.timeParserPolicy": "CORRECTED"})
+    assert_gpu_and_cpu_are_equal_collect(
+        read_func(std_input_path + '/' + input_file,
+        StructType([StructField("data", DateType())]),
+        spark_tmp_table_factory),
+        conf=conf)
+
+@pytest.mark.parametrize('input_file', [
+    "int_formatted.json",
+    "float_formatted.json",
+    "sci_formatted.json",
+    pytest.param("int_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/9664')),
+    "float_formatted_strings.json",
+    "sci_formatted_strings.json",
+    pytest.param("decimal_locale_formatted_strings.json", marks=pytest.mark.xfail(condition=is_before_spark_330(), reason='https://github.com/NVIDIA/spark-rapids/issues/11390')),
+    "single_quoted_strings.json",
+    "boolean_formatted.json",
+    "int_array_formatted.json",
+    "int_struct_formatted.json",
+    "int_mixed_array_struct_formatted.json",
+    pytest.param("escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/9664')),
+    "nested_escaped_strings.json",
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361')),
+    "mixed_objects.json",
+    pytest.param("timestamp_formatted_strings.json", marks=pytest.mark.xfail(condition=is_before_spark_330(), reason='https://github.com/NVIDIA/spark-rapids/issues/11391')),
+    pytest.param("timestamp_tz_formatted_strings.json", marks=pytest.mark.xfail(condition=is_before_spark_330(), reason='https://github.com/NVIDIA/spark-rapids/issues/11391'))])
+@allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
+def test_from_json_corrected_dates(std_input_path, input_file):
+    schema = StructType([StructField("data", DateType())])
+    conf = copy_and_update(_enable_json_to_structs_conf, {"spark.sql.legacy.timeParserPolicy": "CORRECTED"})
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : read_json_as_text(spark, std_input_path + '/' + input_file, "json").select(f.col('json'), f.from_json(f.col('json'), schema)),
+        conf = conf)
+
+@pytest.mark.parametrize('input_file', [
+    pytest.param("int_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10535')),
+    "float_formatted.json",
+    "sci_formatted.json",
+    pytest.param("int_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10535')),
+    "float_formatted_strings.json",
+    "sci_formatted_strings.json",
+    pytest.param("decimal_locale_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10535')),
+    "single_quoted_strings.json",
+    pytest.param("boolean_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10535')),
+    "int_array_formatted.json",
+    "int_struct_formatted.json",
+    "int_mixed_array_struct_formatted.json",
+    "escaped_strings.json",
+    "nested_escaped_strings.json",
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361')),
+    "mixed_objects.json",
+    "timestamp_formatted_strings.json",
+    pytest.param("timestamp_tz_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/6846'))])
+@pytest.mark.parametrize('read_func', [read_json_df])
+@allow_non_gpu(*non_utc_allow)
+def test_scan_json_corrected_timestamps(std_input_path, read_func, spark_tmp_table_factory, input_file):
+    conf = copy_and_update(_enable_all_types_json_scan_conf, {"spark.sql.legacy.timeParserPolicy": "CORRECTED"})
+    assert_gpu_and_cpu_are_equal_collect(
+        read_func(std_input_path + '/' + input_file,
+        StructType([StructField("data", TimestampType())]),
+        spark_tmp_table_factory),
+        conf=conf)
+
+@pytest.mark.parametrize('input_file', [
+    pytest.param("int_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10535')),
+    "float_formatted.json",
+    "sci_formatted.json",
+    pytest.param("int_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10535')),
+    "float_formatted_strings.json",
+    "sci_formatted_strings.json",
+    pytest.param("decimal_locale_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10535')),
+    "single_quoted_strings.json",
+    pytest.param("boolean_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10535')),
+    "int_array_formatted.json",
+    "int_struct_formatted.json",
+    "int_mixed_array_struct_formatted.json",
+    "escaped_strings.json",
+    "nested_escaped_strings.json",
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/11361')),
+    "mixed_objects.json",
+    "timestamp_formatted_strings.json",
+    pytest.param("timestamp_tz_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/6846'))])
+@allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow)
+def test_from_json_corrected_timestamps(std_input_path, input_file):
+    schema = StructType([StructField("data", TimestampType())])
+    conf = copy_and_update(_enable_json_to_structs_conf, {"spark.sql.legacy.timeParserPolicy": "CORRECTED"})
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : read_json_as_text(spark, std_input_path + '/' + input_file, "json").select(f.col('json'), f.from_json(f.col('json'), schema)),
+        conf = conf)
+
 @pytest.mark.parametrize('input_file', [
     pytest.param("int_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
     pytest.param("float_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
@@ -940,7 +1120,11 @@ def test_from_json_doubles(std_input_path, input_file):
     "int_struct_formatted.json",
     pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
     pytest.param("escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
-    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260'))])
+    "nested_escaped_strings.json",
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    "mixed_objects.json",
+    pytest.param("timestamp_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("timestamp_tz_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260'))])
 @pytest.mark.parametrize('read_func', [read_json_df]) # we have done so many tests already that we don't need both read func. They are the same
 def test_scan_json_long_arrays(std_input_path, read_func, spark_tmp_table_factory, input_file):
     assert_gpu_and_cpu_are_equal_collect(
@@ -963,7 +1147,11 @@ def test_scan_json_long_arrays(std_input_path, read_func, spark_tmp_table_factor
     "int_struct_formatted.json",
     pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
     pytest.param("escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
-    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260'))])
+    "nested_escaped_strings.json",
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    "mixed_objects.json",
+    pytest.param("timestamp_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("timestamp_tz_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260'))])
 @allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
 def test_from_json_long_arrays(std_input_path, input_file):
     schema = StructType([StructField("data", ArrayType(LongType()))])
@@ -985,7 +1173,11 @@ def test_from_json_long_arrays(std_input_path, input_file):
     "int_struct_formatted.json",
     pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
     pytest.param("escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
-    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260'))])
+    "nested_escaped_strings.json",
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    "mixed_objects.json",
+    pytest.param("timestamp_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("timestamp_tz_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260'))])
 @pytest.mark.parametrize('read_func', [read_json_df]) # we have done so many tests already that we don't need both read func. They are the same
 def test_scan_json_string_arrays(std_input_path, read_func, spark_tmp_table_factory, input_file):
     assert_gpu_and_cpu_are_equal_collect(
@@ -1008,7 +1200,11 @@ def test_scan_json_string_arrays(std_input_path, read_func, spark_tmp_table_fact
     "int_struct_formatted.json",
     pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
     pytest.param("escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
-    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260'))])
+    "nested_escaped_strings.json",
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    "mixed_objects.json",
+    pytest.param("timestamp_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("timestamp_tz_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260'))])
 @allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
 def test_from_json_string_arrays(std_input_path, input_file):
     schema = StructType([StructField("data", ArrayType(StringType()))])
@@ -1030,7 +1226,11 @@ def test_from_json_string_arrays(std_input_path, input_file):
     pytest.param("int_struct_formatted.json", marks=pytest.mark.xfail(condition=is_before_spark_342(),reason='https://github.com/NVIDIA/spark-rapids/issues/10588')),
     pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
     pytest.param("escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
-    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260'))])
+    pytest.param("nested_escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    "mixed_objects.json",
+    pytest.param("timestamp_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("timestamp_tz_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260'))])
 @pytest.mark.parametrize('read_func', [read_json_df]) # we have done so many tests already that we don't need both read func. They are the same
 def test_scan_json_long_structs(std_input_path, read_func, spark_tmp_table_factory, input_file):
     assert_gpu_and_cpu_are_equal_collect(
@@ -1053,7 +1253,11 @@ def test_scan_json_long_structs(std_input_path, read_func, spark_tmp_table_facto
     pytest.param("int_struct_formatted.json", marks=pytest.mark.xfail(condition=is_before_spark_342(),reason='https://github.com/NVIDIA/spark-rapids/issues/10588')),
     pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
     pytest.param("escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
-    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260'))])
+    pytest.param("nested_escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    "mixed_objects.json",
+    pytest.param("timestamp_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("timestamp_tz_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260'))])
 @allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
 def test_from_json_long_structs(std_input_path, input_file):
     schema = StructType([StructField("data", StructType([StructField("A", LongType()),StructField("B", LongType())]))])
@@ -1075,7 +1279,11 @@ def test_from_json_long_structs(std_input_path, input_file):
     "int_struct_formatted.json",
     pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
     pytest.param("escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
-    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260'))])
+    pytest.param("nested_escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    "mixed_objects.json",
+    pytest.param("timestamp_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("timestamp_tz_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260'))])
 @pytest.mark.parametrize('read_func', [read_json_df]) # we have done so many tests already that we don't need both read func. They are the same
 def test_scan_json_string_structs(std_input_path, read_func, spark_tmp_table_factory, input_file):
     assert_gpu_and_cpu_are_equal_collect(
@@ -1098,7 +1306,11 @@ def test_scan_json_string_structs(std_input_path, read_func, spark_tmp_table_fac
     "int_struct_formatted.json",
     pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
     pytest.param("escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
-    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260'))])
+    pytest.param("nested_escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    "mixed_objects.json",
+    pytest.param("timestamp_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("timestamp_tz_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260'))])
 @allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
 def test_from_json_string_structs(std_input_path, input_file):
     schema = StructType([StructField("data", StructType([StructField("A", StringType()),StructField("B", StringType())]))])
@@ -1120,8 +1332,11 @@ def test_from_json_string_structs(std_input_path, input_file):
     pytest.param("int_array_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/10573')), # This does not fail on 38,0
     "int_struct_formatted.json",
     pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
-    pytest.param("escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
-    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260'))])
+    pytest.param("escaped_stringted_.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    "mixed_objects.json",
+    pytest.param("timestamp_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("timestamp_tz_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260'))])
 @pytest.mark.parametrize('read_func', [read_json_df]) # we have done so many tests already that we don't need both read func. They are the same
 def test_scan_json_dec_arrays(std_input_path, read_func, spark_tmp_table_factory, input_file, dt):
     assert_gpu_and_cpu_are_equal_collect(
@@ -1145,7 +1360,11 @@ def test_scan_json_dec_arrays(std_input_path, read_func, spark_tmp_table_factory
     "int_struct_formatted.json",
     pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
     pytest.param("escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
-    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260'))])
+    "nested_escaped_strings.json",
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    "mixed_objects.json",
+    pytest.param("timestamp_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("timestamp_tz_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260'))])
 @allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
 def test_from_json_dec_arrays(std_input_path, input_file, dt):
     schema = StructType([StructField("data", ArrayType(dt))])
@@ -1153,4 +1372,47 @@ def test_from_json_dec_arrays(std_input_path, input_file, dt):
         lambda spark : read_json_as_text(spark, std_input_path + '/' + input_file, "json").select(f.col('json'), f.from_json(f.col('json'), schema)),
         conf =_enable_json_to_structs_conf)
 
+@pytest.mark.parametrize('input_file', [
+    pytest.param("int_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("float_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("sci_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("int_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("float_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("sci_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("decimal_locale_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("single_quoted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("boolean_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    "int_array_formatted.json",
+    "int_struct_formatted.json",
+    pytest.param("int_mixed_array_struct_formatted.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("nested_escaped_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("repeated_columns.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    "mixed_objects.json",
+    pytest.param("timestamp_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260')),
+    pytest.param("timestamp_tz_formatted_strings.json", marks=pytest.mark.xfail(reason='https://github.com/rapidsai/cudf/issues/15260'))])
+@pytest.mark.parametrize('read_func', [read_json_df]) # we have done so many tests already that we don't need both read func. They are the same
+def test_scan_json_mixed_struct(std_input_path, read_func, spark_tmp_table_factory, input_file):
+    assert_gpu_and_cpu_are_equal_collect(
+        read_func(std_input_path + '/' + input_file,
+        StructType([StructField("data", StructType([StructField("A", StringType()),StructField("B", StringType())]))]),
+        spark_tmp_table_factory),
+        conf=_enable_all_types_json_scan_conf)
 
+@pytest.mark.parametrize('input_file, schema', [
+    ("mixed_objects.json", "id INT, name STRING, tags ARRAY<STRING>, details STRUCT<age: INT, address: STRUCT<city: STRING, zip: STRING>>"),
+    ("mixed_objects.json", "user STRUCT<profile: STRUCT<username: STRING, email: STRING>, skills: ARRAY<STRING>, projects: ARRAY<STRUCT<name: STRING, status: STRING>>>"),
+    ("mixed_objects.json", "departments ARRAY<STRUCT<name: STRING, employees: ARRAY<STRUCT<id: INT, name: STRING>>>>"),
+    ("mixed_objects.json", "data STRUCT<numeric: INT, text: STRING, flag: BOOLEAN, details: STRUCT<timestamp: STRING, list: ARRAY<INT>>>"),
+    ("mixed_objects.json", "data STRUCT<numeric: INT, text: STRING, flag: BOOLEAN, details: STRUCT<timestamp: TIMESTAMP, list: ARRAY<INT>>>"),
+    pytest.param("mixed_objects.json", "data STRUCT<numeric: INT, text: STRING, flag: BOOLEAN, details: STRUCT<timestamp: DATE, list: ARRAY<INT>>>",
+        marks=pytest.mark.xfail(condition=is_before_spark_330(), reason='https://github.com/NVIDIA/spark-rapids/issues/11390')),
+    ("mixed_objects.json", "company STRUCT<departments: ARRAY<STRUCT<department_name: STRING, employees: ARRAY<STRUCT<name: STRING, role: STRING>>>>>"),
+    ])
+@allow_non_gpu(TEXT_INPUT_EXEC, *non_utc_allow) # https://github.com/NVIDIA/spark-rapids/issues/10453
+def test_from_json_mixed_corrected(std_input_path, input_file, schema):
+    conf = copy_and_update(_enable_json_to_structs_conf, {"spark.sql.legacy.timeParserPolicy": "CORRECTED"})
+    assert_gpu_and_cpu_are_equal_collect(
+        lambda spark : read_json_as_text(spark, std_input_path + '/' + input_file, "json").selectExpr('json',
+            "from_json(json, '" + schema + "') as parsed"),
+        conf = conf)
diff --git a/integration_tests/src/test/resources/escaped_strings.json b/integration_tests/src/test/resources/escaped_strings.json
index 43637e14eae..ea7b0bbe8d2 100644
--- a/integration_tests/src/test/resources/escaped_strings.json
+++ b/integration_tests/src/test/resources/escaped_strings.json
@@ -38,3 +38,19 @@
 {"data": "\u0031\u0034\u002E\u0030\u0031"}
 {"data": "\'TESTING\'"}
 {"\u0064\u0061t\u0061": "TEST"}
+{"data": "This is a simple string"}
+{"data": "This string contains a newline character\n"}
+{"data": "This string contains a tab character\t"}
+{"data": "This string contains a backslash \\"}
+{"data": "This string contains a double quote \""}
+{"data": "This string contains a unicode character \u00A9"}
+{"data": "This string contains a smiley face \u263A"}
+{"data": "This string contains a backspace character\b"}
+{"data": "This string contains a form feed character\f"}
+{"data": "This string contains a form feed character\u000C"}
+{"data": "This string contains a carriage return\r"}
+{"data": "This string contains a carriage return\u000D"}
+{"data": "This string contains an illegal control character \u0007 (bell)"}
+{"data": "This string contains an illegal character \u001F (unit separator)"}
+{"data": "This string contains a mix of normal and escaped characters: \n \" \t \u0041"}
+{"data": "This string contains an illegal control character directly: \u0001"}
diff --git a/integration_tests/src/test/resources/mixed_objects.json b/integration_tests/src/test/resources/mixed_objects.json
new file mode 100644
index 00000000000..91225124c6a
--- /dev/null
+++ b/integration_tests/src/test/resources/mixed_objects.json
@@ -0,0 +1,35 @@
+{"id": 1,"name": "John","tags": ["developer", "python"],"details": {"age": 30,"address": {"city": "San Francisco","zip": "94105"}}}
+{"user": { "profile": { "username": "jane_doe","email": "jane.doe@example.com"},"skills": ["java", "spark", "sql"],"projects": [{"name": "Project X", "status": "completed"},{"name": "Project Y", "status": "ongoing"}]}}
+{"departments": [{"name": "Engineering","employees": [{"id": 101, "name": "Alice"},{"id": 102, "name": "Bob"}]},{"name": "Sales","employees": [{"id": 201, "name": "Charlie"},{"id": 202, "name": "David"}]}]}
+{"data": {"numeric": 123, "text": "example", "flag": true, "details": { "timestamp": "2024-08-22T10:00:00Z","list": [1, 2, 3]}}}
+{"company": {"departments": [{"department_name": "HR","employees": [{"name": "Emily", "role": "Recruiter"},{"name": "Frank", "role": "HR Manager"}]},{"department_name": "IT","employees": [{"name": "Grace", "role": "Software Engineer"},{"name": "Hank", "role": "System Admin"}]}]}}
+{"id": 2, "name": "Alice", "tags": ["data scientist", "R"], "details": {"age": 28, "address": {"city": "New York", "zip": "10001"}}}
+{"user": {"profile": {"username": "bob_smith", "email": "bob.smith@example.com"}, "skills": ["javascript", "node.js"], "projects": [{"name": "Project A", "status": "completed"}, {"name": "Project B", "status": "pending"}]}}
+{"departments": [{"name": "Marketing", "employees": [{"id": 301, "name": "Tom"}, {"id": 302, "name": "Jerry"}]}, {"name": "Finance", "employees": [{"id": 401, "name": "Mickey"}, {"id": 402, "name": "Donald"}]}]}
+{"data": {"numeric": 456, "text": "sample", "flag": false, "details": {"timestamp": "2024-08-23T11:00:00Z", "list": [4, 5, 6]}}}
+{"company": {"departments": [{"department_name": "Sales", "employees": [{"name": "Olivia", "role": "Sales Manager"}, {"name": "Liam", "role": "Sales Associate"}]}, {"department_name": "Engineering", "employees": [{"name": "Noah", "role": "DevOps Engineer"}, {"name": "Emma", "role": "Frontend Developer"}]}]}}
+{"id": 3, "name": "Robert", "tags": ["backend", "java"], "details": {"age": 35, "address": {"city": "Seattle", "zip": "98101"}}}
+{"user": {"profile": {"username": "carol_jones", "email": "carol.jones@example.com"}, "skills": ["python", "machine learning"], "projects": [{"name": "Project Z", "status": "ongoing"}, {"name": "Project W", "status": "completed"}]}}
+{"departments": [{"name": "HR", "employees": [{"id": 501, "name": "Sophia"}, {"id": 502, "name": "Jackson"}]}, {"name": "IT", "employees": [{"id": 601, "name": "Aiden"}, {"id": 602, "name": "Lucas"}]}]}
+{"data": {"numeric": 789, "text": "test", "flag": true, "details": {"timestamp": "2024-08-24T12:00:00Z", "list": [7, 8, 9]}}}
+{"company": {"departments": [{"department_name": "Customer Support", "employees": [{"name": "Mia", "role": "Support Specialist"}, {"name": "Ethan", "role": "Support Manager"}]}, {"department_name": "Development", "employees": [{"name": "Isabella", "role": "Backend Developer"}, {"name": "James", "role": "Frontend Developer"}]}]}}
+{"id": 4, "name": "Emily", "tags": ["UI/UX", "design"], "details": {"age": 29, "address": {"city": "Los Angeles", "zip": "90001"}}}
+{"user": {"profile": {"username": "david_clark", "email": "david.clark@example.com"}, "skills": ["sql", "data analysis"], "projects": [{"name": "Project M", "status": "completed"}, {"name": "Project N", "status": "pending"}]}}
+{"departments": [{"name": "Operations", "employees": [{"id": 701, "name": "Ella"}, {"id": 702, "name": "Liam"}]}, {"name": "Legal", "employees": [{"id": 801, "name": "Ava"}, {"id": 802, "name": "William"}]}]}
+{"data": {"numeric": 321, "text": "data", "flag": false, "details": {"timestamp": "2024-08-25T13:00:00Z", "list": [3, 2, 1]}}}
+{"company": {"departments": [{"department_name": "Product", "employees": [{"name": "Jack", "role": "Product Manager"}, {"name": "Sophia", "role": "Product Designer"}]}, {"department_name": "Marketing", "employees": [{"name": "Oliver", "role": "Content Writer"}, {"name": "Charlotte", "role": "SEO Specialist"}]}]}}
+{"id": 5, "name": "Michael", "tags": ["full-stack", "ruby"], "details": {"age": 32, "address": {"city": "Austin", "zip": "73301"}}}
+{"user": {"profile": {"username": "lisa_white", "email": "lisa.white@example.com"}, "skills": ["php", "web development"], "projects": [{"name": "Project O", "status": "completed"}, {"name": "Project P", "status": "ongoing"}]}}
+{"departments": [{"name": "Research", "employees": [{"id": 901, "name": "Benjamin"}, {"id": 902, "name": "Mia"}]}, {"name": "Training", "employees": [{"id": 1001, "name": "Zoe"}, {"id": 1002, "name": "Ryan"}]}]}
+{"data": {"numeric": 654, "text": "example", "flag": true, "details": {"timestamp": "2024-08-26T14:00:00Z", "list": [6, 5, 4]}}}
+{"company": {"departments": [{"department_name": "Finance", "employees": [{"name": "Lucas", "role": "Financial Analyst"}, {"name": "Emma", "role": "Finance Director"}]}, {"department_name": "Legal", "employees": [{"name": "Liam", "role": "Legal Counsel"}, {"name": "Olivia", "role": "Paralegal"}]}]}}
+{"id": 6, "name": "Sophia", "tags": ["cloud", "AWS"], "details": {"age": 31, "address": {"city": "San Diego", "zip": "92101"}}}
+{"user": {"profile": {"username": "aaron_lee", "email": "aaron.lee@example.com"}, "skills": ["c++", "system programming"], "projects": [{"name": "Project Q", "status": "ongoing"}, {"name": "Project R", "status": "completed"}]}}
+{"departments": [{"name": "Design", "employees": [{"id": 1101, "name": "Ella"}, {"id": 1102, "name": "Jack"}]}, {"name": "Strategy", "employees": [{"id": 1201, "name": "Mason"}, {"id": 1202, "name": "Ava"}]}]}
+{"data": {"numeric": 987, "text": "test", "flag": false, "details": {"timestamp": "2024-08-27T15:00:00Z", "list": [9, 8, 7]}}}
+{"company": {"departments": [{"department_name": "Sales", "employees": [{"name": "Aiden", "role": "Sales Director"}, {"name": "Emily", "role": "Sales Representative"}]}, {"department_name": "Development", "employees": [{"name": "James", "role": "Lead Developer"}, {"name": "Mia", "role": "Junior Developer"}]}]}}
+{"id": 7, "name": "David", "tags": ["embedded systems", "IoT"], "details": {"age": 34, "address": {"city": "Boston", "zip": "02101"}}}
+{"user": {"profile": {"username": "nina_garcia", "email": "nina.garcia@example.com"}, "skills": ["ruby", "backend"], "projects": [{"name": "Project S", "status": "completed"}, {"name": "Project T", "status": "pending"}]}}
+{"departments": [{"name": "Customer Support", "employees": [{"id": 1301, "name": "Daniel"}, {"id": 1302, "name": "Sophia"}]}, {"name": "Administration", "employees": [{"id": 1401, "name": "Olivia"}, {"id": 1402, "name": "Ethan"}]}]}
+{"data": {"numeric": 111, "text": "sample", "flag": true, "details": {"timestamp": "2024-08-28T16:00:00Z", "list": [1, 1, 1]}}}
+{"company": {"departments": [{"department_name": "Operations", "employees": [{"name": "Ryan", "role": "Operations Manager"}, {"name": "Emma", "role": "Operations Analyst"}]}, {"department_name": "Product", "employees": [{"name": "Olivia", "role": "Product Owner"}, {"name": "Mason", "role": "Product Designer"}]}]}}
diff --git a/integration_tests/src/test/resources/nested_escaped_strings.json b/integration_tests/src/test/resources/nested_escaped_strings.json
new file mode 100644
index 00000000000..bd67e16aeb0
--- /dev/null
+++ b/integration_tests/src/test/resources/nested_escaped_strings.json
@@ -0,0 +1,55 @@
+{"data": {"a": "ABCDEFGHIJKLMNOPQRSTUVWXYZ"}}
+{"data": {"a": "\a"}}
+{"data": {"a": "\b"}}
+{"data": {"a": "\c"}}
+{"data": {"a": "\d"}}
+{"data": {"a": "\e"}}
+{"data": {"a": "\f"}}
+{"data": {"a": "\g"}}
+{"data": {"a": "\h"}}
+{"data": {"a": "\i"}}
+{"data": {"a": "\j"}}
+{"data": {"a": "\k"}}
+{"data": {"a": "\l"}}
+{"data": {"a": "\m"}}
+{"data": {"a": "\n"}}
+{"data": {"a": "\o"}}
+{"data": {"a": "\p"}}
+{"data": {"a": "\q"}}
+{"data": {"a": "\r"}}
+{"data": {"a": "\s"}}
+{"data": {"a": "\t"}}
+{"data": {"a": "\u"}}
+{"data": {"a": "\v"}}
+{"data": {"a": "\w"}}
+{"data": {"a": "\x"}}
+{"data": {"a": "\y"}}
+{"data": {"a": "\z"}}
+{"data": {"a": "\\"}}
+{"data": {"a": "\""}}
+{"data": {"a": "\'"}}
+{"data": {"a": "\u0000"}}
+{"data": {"a": "\u0001"}}
+{"data": {"a": "\u0002"}}
+{"data": {"a": "\u0003"}}
+{"data": {"a": "\u0004"}}
+{"data": {"a": "This\ris\nA\ttest\u0009to\u000Asee\u000awhat\u000Bhappens"}}
+{"data": {"a": "\u0031\u0034"}}
+{"data": {"a": "\u0031\u0034\u002E\u0030\u0031"}}
+{"data": {"a": "\'TESTING\'"}}
+{"data": {"a": "This is a simple string"}}
+{"data": {"a": "This string contains a newline character\n"}}
+{"data": {"a": "This string contains a tab character\t"}}
+{"data": {"a": "This string contains a backslash \\"}}
+{"data": {"a": "This string contains a double quote \""}}
+{"data": {"a": "This string contains a unicode character \u00A9"}}
+{"data": {"a": "This string contains a smiley face \u263A"}}
+{"data": {"a": "This string contains a backspace character\b"}}
+{"data": {"a": "This string contains a form feed character\f"}}
+{"data": {"a": "This string contains a form feed character\u000C"}}
+{"data": {"a": "This string contains a carriage return\r"}}
+{"data": {"a": "This string contains a carriage return\u000D"}}
+{"data": {"a": "This string contains an illegal control character \u0007 (bell)"}}
+{"data": {"a": "This string contains an illegal character \u001F (unit separator)"}}
+{"data": {"a": "This string contains a mix of normal and escaped characters: \n \" \t \u0041"}}
+{"data": {"a": "This string contains an illegal control character directly: \u0001"}}
diff --git a/integration_tests/src/test/resources/timestamp_formatted_strings.json b/integration_tests/src/test/resources/timestamp_formatted_strings.json
new file mode 100644
index 00000000000..d6399b86a0c
--- /dev/null
+++ b/integration_tests/src/test/resources/timestamp_formatted_strings.json
@@ -0,0 +1,38 @@
+{"data": "2024-08-22"}
+{"data": "2023-02-28T14:45:00Z"}
+{"data": "2023-02-28T14:45:00.123Z"}
+{"data": "2024-02-29T23:59:59.999Z"}
+{"data": "not-a-date"}
+{"data": "2024/08/22"}
+{"data": "2023-02-28 14:45:00"}
+{"data": "2023-02-28 14:45:00.123"}
+{"data": "2020-02-25" }
+{"data": "2020-02-25 14:46" }
+{"data": "2020-02-25T14:46" }
+{"data": "2020-02-25 14:46:00" }
+{"data": "2020-02-25T14:46:00" }
+{"data": "2020-02-25T14:46:00  " }
+{"data": "2020-02-25 14:46:00.123" }
+{"data": "2020-02-25T14:46:00.123" }
+{"data": "  2020-02-25T14:46:00.123" }
+{"data": "2020-02-25 14:46:00.123456" }
+{"data": "2020-02-25T14:46:00.123456" }
+{"data": "1900-01-01"}
+{"data": "1969-12-31"}
+{"data": "1970-01-01"}
+{"data": "0001-01-01"}
+{"data": "0999-12-31"}
+{"data": "1899-12-31"}
+{"data": "2023-02-28T14:45:00Z"}
+{"data": "1969-07-20T20:17:40Z"}
+{"data": "0001-01-01T00:00:00Z"}
+{"data": "9999-12-31T23:59:59.999Z"}
+{"data": "1960-04-15T12:30:45.123Z"}
+{"data": "1945-05-08T00:01:00Z"}
+{"data": "1970-01-01T00:00:00Z"}
+{"data": "1865-04-09T12:00:00Z"}
+{"data": "1815-06-18T10:30:00Z"}
+{"data": "1582-10-15"}
+{"data": "1899-12-31T23:59:59.999Z"}
+{"data": "0000-12-31T23:59:59.999Z"}
+{"data": "22-08-2024"}
diff --git a/integration_tests/src/test/resources/timestamp_tz_formatted_strings.json b/integration_tests/src/test/resources/timestamp_tz_formatted_strings.json
new file mode 100644
index 00000000000..8d36aa4a39b
--- /dev/null
+++ b/integration_tests/src/test/resources/timestamp_tz_formatted_strings.json
@@ -0,0 +1,12 @@
+{"data": "2024-08-22T14:45:00-05:00"}
+{"data": "2023-02-28T09:30:00+01:00"}
+{"data": "1969-07-20T20:17:40-04:00"}
+{"data": "1970-01-01T00:00:00+00:00"}
+{"data": "1999-12-31T23:59:59-08:00"}
+{"data": "2012-06-30T23:59:60+09:00"}
+{"data": "1945-05-08T02:01:00+02:00"}
+{"data": "2024-08-22T14:45:00+10:00"}
+{"data": "2023-02-28T14:45:00-03:30"}
+{"data": "2021-12-31T23:59:59+05:30"}
+{"data": "2023-02-28T14:45:00.123-05:00"}
+
diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuJsonTuple.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuJsonTuple.scala
index e8fbd7e5e61..3b7767117fb 100644
--- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuJsonTuple.scala
+++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuJsonTuple.scala
@@ -24,7 +24,7 @@ import com.nvidia.spark.rapids.shims.ShimExpression
 
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.types.{DataType, StringType, StructField, StructType}
+import org.apache.spark.sql.types.{StringType, StructField, StructType}
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
 case class GpuJsonTuple(children: Seq[Expression]) extends GpuGenerator
@@ -57,7 +57,6 @@ case class GpuJsonTuple(children: Seq[Expression]) extends GpuGenerator
     withRetry(inputBatches, splitSpillableInHalfByRows) { attempt =>
       withResource(attempt.getColumnarBatch()) { inputBatch =>
         val json = inputBatch.column(generatorOffset).asInstanceOf[GpuColumnVector].getBase
-        val schema = Array.fill[DataType](fieldExpressions.length)(StringType)
 
         val fieldInstructions = fieldExpressions.map { field =>
           withResourceIfAllowed(field.columnarEvalAny(inputBatch)) {
@@ -72,8 +71,8 @@ case class GpuJsonTuple(children: Seq[Expression]) extends GpuGenerator
 
         withResource(fieldInstructions.safeMap(field => JSONUtils.getJsonObject(json, field))) { 
             resultCols =>
-          val generatorCols = resultCols.safeMap(_.incRefCount).zip(schema).safeMap {
-            case (col, dataType) => GpuColumnVector.from(col, dataType)
+          val generatorCols = resultCols.safeMap(_.incRefCount).safeMap {
+            col => GpuColumnVector.from(col, StringType)
           }
           val nonGeneratorCols = (0 until generatorOffset).safeMap { i =>
             inputBatch.column(i).asInstanceOf[GpuColumnVector].incRefCount