Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 67 additions & 9 deletions python/datafusion/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@

if TYPE_CHECKING:
from datafusion.context import SessionContext

__all__ = [
"abs",
"acos",
Expand Down Expand Up @@ -268,7 +267,11 @@
"sum",
"tan",
"tanh",
"to_char",
"to_date",
"to_hex",
"to_local_time",
"to_time",
"to_timestamp",
"to_timestamp_micros",
"to_timestamp_millis",
Expand Down Expand Up @@ -1010,17 +1013,69 @@ def now() -> Expr:
return Expr(f.now())


def to_char(arg: Expr, format: Expr) -> Expr:
"""Returns a string representation of a date, time, timestamp or duration.

For usage of ``format`` see the rust chrono package ``strftime`` package.

[Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
"""
return Expr(f.to_char(arg.expr, format.expr))


def to_date(arg: Expr, *formatters: Expr) -> Expr:
"""Converts a value to a date (YYYY-MM-DD).

Supports strings, numeric and timestamp types as input.
Integers and doubles are interpreted as days since the unix epoch.
Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20')
if ``formatters`` are not provided.

For usage of ``formatters`` see the rust chrono package ``strftime`` package.

[Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
"""
if not formatters:
return Expr(f.to_date(arg.expr))
formatters = [fmt.expr for fmt in formatters]
return Expr(f.to_date(arg.expr, *formatters))


def to_local_time(arg: Expr) -> Expr:
"""Converts a timestamp with a timezone to a timestamp without a timezone.

This function handles daylight saving time changes.
"""
return Expr(f.to_local_time(arg.expr))


def to_time(arg: Expr, *formatters: Expr) -> Expr:
"""Converts a value to a time. Supports strings and timestamps as input.

If ``formatters`` is not provided strings are parsed as HH:MM:SS, HH:MM or
HH:MM:SS.nnnnnnnnn;

For usage of ``formatters`` see the rust chrono package ``strftime`` package.

[Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
"""
if not formatters:
return Expr(f.to_time(arg.expr))
formatters = [fmt.expr for fmt in formatters]
return Expr(f.to_time(arg.expr, *formatters))


def to_timestamp(arg: Expr, *formatters: Expr) -> Expr:
"""Converts a string and optional formats to a ``Timestamp`` in nanoseconds.

For usage of ``formatters`` see the rust chrono package ``strftime`` package.

[Documentation here.](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
"""
if formatters is None:
return f.to_timestamp(arg.expr)
if not formatters:
return Expr(f.to_timestamp(arg.expr))

formatters = [f.expr for f in formatters]
formatters = [fmt.expr for fmt in formatters]
return Expr(f.to_timestamp(arg.expr, *formatters))


Expand All @@ -1029,7 +1084,7 @@ def to_timestamp_millis(arg: Expr, *formatters: Expr) -> Expr:

See :py:func:`to_timestamp` for a description on how to use formatters.
"""
formatters = [f.expr for f in formatters]
formatters = [fmt.expr for fmt in formatters]
return Expr(f.to_timestamp_millis(arg.expr, *formatters))


Expand All @@ -1038,7 +1093,7 @@ def to_timestamp_micros(arg: Expr, *formatters: Expr) -> Expr:

See :py:func:`to_timestamp` for a description on how to use formatters.
"""
formatters = [f.expr for f in formatters]
formatters = [fmt.expr for fmt in formatters]
return Expr(f.to_timestamp_micros(arg.expr, *formatters))


Expand All @@ -1047,7 +1102,7 @@ def to_timestamp_nanos(arg: Expr, *formatters: Expr) -> Expr:

See :py:func:`to_timestamp` for a description on how to use formatters.
"""
formatters = [f.expr for f in formatters]
formatters = [fmt.expr for fmt in formatters]
return Expr(f.to_timestamp_nanos(arg.expr, *formatters))


Expand All @@ -1056,13 +1111,13 @@ def to_timestamp_seconds(arg: Expr, *formatters: Expr) -> Expr:

See :py:func:`to_timestamp` for a description on how to use formatters.
"""
formatters = [f.expr for f in formatters]
formatters = [fmt.expr for fmt in formatters]
return Expr(f.to_timestamp_seconds(arg.expr, *formatters))


def to_unixtime(string: Expr, *format_arguments: Expr) -> Expr:
"""Converts a string and optional formats to a Unixtime."""
args = [f.expr for f in format_arguments]
args = [fmt.expr for fmt in format_arguments]
return Expr(f.to_unixtime(string.expr, *args))


Expand All @@ -1071,6 +1126,9 @@ def current_date() -> Expr:
return Expr(f.current_date())


today = current_date


def current_time() -> Expr:
"""Returns current UTC time as a Time64 value."""
return Expr(f.current_time())
Expand Down
41 changes: 40 additions & 1 deletion python/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
import math
from datetime import datetime, timezone
from datetime import date, datetime, time, timezone

import numpy as np
import pyarrow as pa
Expand Down Expand Up @@ -952,6 +952,12 @@ def test_temporal_functions(df):
f.to_timestamp_nanos(
literal("2023-09-07 05:06:14.523952000"), literal("%Y-%m-%d %H:%M:%S.%f")
),
f.to_time(literal("12:30:45")),
f.to_time(literal("12-30-45"), literal("%H-%M-%S")),
f.to_date(literal("2017-05-31")),
f.to_date(literal("2017-05-31"), literal("%Y-%m-%d")),
f.to_local_time(column("d")),
f.to_char(column("d"), literal("%d-%m-%Y")),
)
result = df.collect()
assert len(result) == 1
Expand Down Expand Up @@ -1026,6 +1032,39 @@ def test_temporal_functions(df):
[datetime(2023, 9, 7, 5, 6, 14, 523952, tzinfo=DEFAULT_TZ)] * 3,
type=pa.timestamp("ns"),
)
assert result.column(17) == pa.array(
[time(12, 30, 45)] * 3,
type=pa.time64("ns"),
)
assert result.column(18) == pa.array(
[time(12, 30, 45)] * 3,
type=pa.time64("ns"),
)
assert result.column(19) == pa.array(
[date(2017, 5, 31)] * 3,
type=pa.date32(),
)
assert result.column(20) == pa.array(
[date(2017, 5, 31)] * 3,
type=pa.date32(),
)
assert result.column(21) == pa.array(
[
datetime(2022, 12, 31, tzinfo=DEFAULT_TZ),
datetime(2027, 6, 26, tzinfo=DEFAULT_TZ),
datetime(2020, 7, 2, tzinfo=DEFAULT_TZ),
],
type=pa.timestamp("us"),
)

assert result.column(22) == pa.array(
[
"31-12-2022",
"26-06-2027",
"02-07-2020",
],
type=pa.string(),
)


def test_arrow_cast(df):
Expand Down
8 changes: 8 additions & 0 deletions src/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,9 @@ expr_fn!(
"Converts the number to its equivalent hexadecimal representation."
);
expr_fn!(now);
expr_fn_vec!(to_date);
expr_fn_vec!(to_local_time);
expr_fn_vec!(to_time);
expr_fn_vec!(to_timestamp);
expr_fn_vec!(to_timestamp_millis);
expr_fn_vec!(to_timestamp_nanos);
Expand All @@ -613,6 +616,7 @@ expr_fn!(date_part, part date);
expr_fn!(date_trunc, part date);
expr_fn!(date_bin, stride source origin);
expr_fn!(make_date, year month day);
expr_fn!(to_char, datetime format);

expr_fn!(translate, string from to, "Replaces each character in string that matches a character in the from set with the corresponding character in the to set. If from is longer than to, occurrences of the extra characters in from are deleted.");
expr_fn_vec!(
Expand Down Expand Up @@ -1045,6 +1049,10 @@ pub(crate) fn init_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(tan))?;
m.add_wrapped(wrap_pyfunction!(tanh))?;
m.add_wrapped(wrap_pyfunction!(to_hex))?;
m.add_wrapped(wrap_pyfunction!(to_char))?;
m.add_wrapped(wrap_pyfunction!(to_date))?;
m.add_wrapped(wrap_pyfunction!(to_local_time))?;
m.add_wrapped(wrap_pyfunction!(to_time))?;
m.add_wrapped(wrap_pyfunction!(to_timestamp))?;
m.add_wrapped(wrap_pyfunction!(to_timestamp_millis))?;
m.add_wrapped(wrap_pyfunction!(to_timestamp_nanos))?;
Expand Down