From a8d11665ebc487c86bd9908b7a1652cf52623ab9 Mon Sep 17 00:00:00 2001 From: Daniel Mesejo Date: Thu, 19 Feb 2026 15:37:43 +0100 Subject: [PATCH] feat: add to_time, to_local_time, to_date, to_char functions Additionally fix conditional on formatters (since it is *args it cannot be None) Refactor name to avoid possible collision with f. --- python/datafusion/functions.py | 76 ++++++++++++++++++++++++++++++---- python/tests/test_functions.py | 41 +++++++++++++++++- src/functions.rs | 8 ++++ 3 files changed, 115 insertions(+), 10 deletions(-) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py index 431afcc30..7e5e0434a 100644 --- a/python/datafusion/functions.py +++ b/python/datafusion/functions.py @@ -42,7 +42,6 @@ if TYPE_CHECKING: from datafusion.context import SessionContext - __all__ = [ "abs", "acos", @@ -268,7 +267,11 @@ "sum", "tan", "tanh", + "to_char", + "to_date", "to_hex", + "to_local_time", + "to_time", "to_timestamp", "to_timestamp_micros", "to_timestamp_millis", @@ -1010,6 +1013,58 @@ def now() -> Expr: return Expr(f.now()) +def to_char(arg: Expr, format: Expr) -> Expr: + """Returns a string representation of a date, time, timestamp or duration. + + For usage of ``format`` see the rust chrono package ``strftime`` package. + + [Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) + """ + return Expr(f.to_char(arg.expr, format.expr)) + + +def to_date(arg: Expr, *formatters: Expr) -> Expr: + """Converts a value to a date (YYYY-MM-DD). + + Supports strings, numeric and timestamp types as input. + Integers and doubles are interpreted as days since the unix epoch. + Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') + if ``formatters`` are not provided. + + For usage of ``formatters`` see the rust chrono package ``strftime`` package. + + [Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) + """ + if not formatters: + return Expr(f.to_date(arg.expr)) + formatters = [fmt.expr for fmt in formatters] + return Expr(f.to_date(arg.expr, *formatters)) + + +def to_local_time(arg: Expr) -> Expr: + """Converts a timestamp with a timezone to a timestamp without a timezone. + + This function handles daylight saving time changes. + """ + return Expr(f.to_local_time(arg.expr)) + + +def to_time(arg: Expr, *formatters: Expr) -> Expr: + """Converts a value to a time. Supports strings and timestamps as input. + + If ``formatters`` is not provided strings are parsed as HH:MM:SS, HH:MM or + HH:MM:SS.nnnnnnnnn; + + For usage of ``formatters`` see the rust chrono package ``strftime`` package. + + [Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) + """ + if not formatters: + return Expr(f.to_time(arg.expr)) + formatters = [fmt.expr for fmt in formatters] + return Expr(f.to_time(arg.expr, *formatters)) + + def to_timestamp(arg: Expr, *formatters: Expr) -> Expr: """Converts a string and optional formats to a ``Timestamp`` in nanoseconds. @@ -1017,10 +1072,10 @@ def to_timestamp(arg: Expr, *formatters: Expr) -> Expr: [Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) """ - if formatters is None: - return f.to_timestamp(arg.expr) + if not formatters: + return Expr(f.to_timestamp(arg.expr)) - formatters = [f.expr for f in formatters] + formatters = [fmt.expr for fmt in formatters] return Expr(f.to_timestamp(arg.expr, *formatters)) @@ -1029,7 +1084,7 @@ def to_timestamp_millis(arg: Expr, *formatters: Expr) -> Expr: See :py:func:`to_timestamp` for a description on how to use formatters. """ - formatters = [f.expr for f in formatters] + formatters = [fmt.expr for fmt in formatters] return Expr(f.to_timestamp_millis(arg.expr, *formatters)) @@ -1038,7 +1093,7 @@ def to_timestamp_micros(arg: Expr, *formatters: Expr) -> Expr: See :py:func:`to_timestamp` for a description on how to use formatters. """ - formatters = [f.expr for f in formatters] + formatters = [fmt.expr for fmt in formatters] return Expr(f.to_timestamp_micros(arg.expr, *formatters)) @@ -1047,7 +1102,7 @@ def to_timestamp_nanos(arg: Expr, *formatters: Expr) -> Expr: See :py:func:`to_timestamp` for a description on how to use formatters. """ - formatters = [f.expr for f in formatters] + formatters = [fmt.expr for fmt in formatters] return Expr(f.to_timestamp_nanos(arg.expr, *formatters)) @@ -1056,13 +1111,13 @@ def to_timestamp_seconds(arg: Expr, *formatters: Expr) -> Expr: See :py:func:`to_timestamp` for a description on how to use formatters. """ - formatters = [f.expr for f in formatters] + formatters = [fmt.expr for fmt in formatters] return Expr(f.to_timestamp_seconds(arg.expr, *formatters)) def to_unixtime(string: Expr, *format_arguments: Expr) -> Expr: """Converts a string and optional formats to a Unixtime.""" - args = [f.expr for f in format_arguments] + args = [fmt.expr for fmt in format_arguments] return Expr(f.to_unixtime(string.expr, *args)) @@ -1071,6 +1126,9 @@ def current_date() -> Expr: return Expr(f.current_date()) +today = current_date + + def current_time() -> Expr: """Returns current UTC time as a Time64 value.""" return Expr(f.current_time()) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 7b3332ed7..25f77465e 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. import math -from datetime import datetime, timezone +from datetime import date, datetime, time, timezone import numpy as np import pyarrow as pa @@ -952,6 +952,12 @@ def test_temporal_functions(df): f.to_timestamp_nanos( literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f") ), + f.to_time(literal("12:30:45")), + f.to_time(literal("12-30-45"), literal("%H-%M-%S")), + f.to_date(literal("2017-05-31")), + f.to_date(literal("2017-05-31"), literal("%Y-%m-%d")), + f.to_local_time(column("d")), + f.to_char(column("d"), literal("%d-%m-%Y")), ) result = df.collect() assert len(result) == 1 @@ -1026,6 +1032,39 @@ def test_temporal_functions(df): [datetime(2023, 9, 7, 5, 6, 14, 523952, tzinfo=DEFAULT_TZ)] * 3, type=pa.timestamp("ns"), ) + assert result.column(17) == pa.array( + [time(12, 30, 45)] * 3, + type=pa.time64("ns"), + ) + assert result.column(18) == pa.array( + [time(12, 30, 45)] * 3, + type=pa.time64("ns"), + ) + assert result.column(19) == pa.array( + [date(2017, 5, 31)] * 3, + type=pa.date32(), + ) + assert result.column(20) == pa.array( + [date(2017, 5, 31)] * 3, + type=pa.date32(), + ) + assert result.column(21) == pa.array( + [ + datetime(2022, 12, 31, tzinfo=DEFAULT_TZ), + datetime(2027, 6, 26, tzinfo=DEFAULT_TZ), + datetime(2020, 7, 2, tzinfo=DEFAULT_TZ), + ], + type=pa.timestamp("us"), + ) + + assert result.column(22) == pa.array( + [ + "31-12-2022", + "26-06-2027", + "02-07-2020", + ], + type=pa.string(), + ) def test_arrow_cast(df): diff --git a/src/functions.rs b/src/functions.rs index 90b3a0a4b..c32134054 100644 --- a/src/functions.rs +++ b/src/functions.rs @@ -601,6 +601,9 @@ expr_fn!( "Converts the number to its equivalent hexadecimal representation." ); expr_fn!(now); +expr_fn_vec!(to_date); +expr_fn_vec!(to_local_time); +expr_fn_vec!(to_time); expr_fn_vec!(to_timestamp); expr_fn_vec!(to_timestamp_millis); expr_fn_vec!(to_timestamp_nanos); @@ -613,6 +616,7 @@ expr_fn!(date_part, part date); expr_fn!(date_trunc, part date); expr_fn!(date_bin, stride source origin); expr_fn!(make_date, year month day); +expr_fn!(to_char, datetime format); expr_fn!(translate, string from to, "Replaces each character in string that matches a character in the from set with the corresponding character in the to set. If from is longer than to, occurrences of the extra characters in from are deleted."); expr_fn_vec!( @@ -1045,6 +1049,10 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_wrapped(wrap_pyfunction!(tan))?; m.add_wrapped(wrap_pyfunction!(tanh))?; m.add_wrapped(wrap_pyfunction!(to_hex))?; + m.add_wrapped(wrap_pyfunction!(to_char))?; + m.add_wrapped(wrap_pyfunction!(to_date))?; + m.add_wrapped(wrap_pyfunction!(to_local_time))?; + m.add_wrapped(wrap_pyfunction!(to_time))?; m.add_wrapped(wrap_pyfunction!(to_timestamp))?; m.add_wrapped(wrap_pyfunction!(to_timestamp_millis))?; m.add_wrapped(wrap_pyfunction!(to_timestamp_nanos))?;