4545import pyarrow as pa
4646
4747if TYPE_CHECKING :
48- from collections .abc import Callable
48+ from collections .abc import Callable , Iterable
4949
5050from datafusion ._internal import functions as f
5151from datafusion .common import NullTreatment
5555 SortExpr ,
5656 SortKey ,
5757 coerce_to_expr ,
58+ coerce_to_expr_list ,
5859 coerce_to_expr_or_none ,
5960 expr_list_to_raw_expr_list ,
6061 sort_list_to_raw_sort_list ,
@@ -2391,11 +2392,11 @@ def date_format(arg: Expr, formatter: Expr | str) -> Expr:
23912392 return to_char (arg , formatter )
23922393
23932394
2394- def _unwrap_exprs (args : tuple [Expr , ... ]) -> list :
2395+ def _unwrap_exprs (args : Iterable [Expr ]) -> list :
23952396 return [arg .expr for arg in args ]
23962397
23972398
2398- def to_date (arg : Expr , * formatters : Expr ) -> Expr :
2399+ def to_date (arg : Expr , * formatters : Expr | str ) -> Expr :
23992400 """Converts a value to a date (YYYY-MM-DD).
24002401
24012402 Supports strings, numeric and timestamp types as input.
@@ -2414,8 +2415,16 @@ def to_date(arg: Expr, *formatters: Expr) -> Expr:
24142415 ... dfn.functions.to_date(dfn.col("a")).alias("dt"))
24152416 >>> str(result.collect_column("dt")[0].as_py())
24162417 '2021-07-20'
2418+
2419+ Pass a format string as a bare ``str``:
2420+
2421+ >>> df = ctx.from_pydict({"a": ["20-07-2021"]})
2422+ >>> result = df.select(
2423+ ... dfn.functions.to_date(dfn.col("a"), "%d-%m-%Y").alias("dt"))
2424+ >>> str(result.collect_column("dt")[0].as_py())
2425+ '2021-07-20'
24172426 """
2418- return Expr (f .to_date (arg .expr , * _unwrap_exprs (formatters )))
2427+ return Expr (f .to_date (arg .expr , * _unwrap_exprs (coerce_to_expr_list ( formatters ) )))
24192428
24202429
24212430def to_local_time (* args : Expr ) -> Expr :
@@ -2426,7 +2435,7 @@ def to_local_time(*args: Expr) -> Expr:
24262435 return Expr (f .to_local_time (* _unwrap_exprs (args )))
24272436
24282437
2429- def to_time (arg : Expr , * formatters : Expr ) -> Expr :
2438+ def to_time (arg : Expr , * formatters : Expr | str ) -> Expr :
24302439 """Converts a value to a time. Supports strings and timestamps as input.
24312440
24322441 If ``formatters`` is not provided strings are parsed as HH:MM:SS, HH:MM or
@@ -2443,11 +2452,19 @@ def to_time(arg: Expr, *formatters: Expr) -> Expr:
24432452 ... dfn.functions.to_time(dfn.col("a")).alias("t"))
24442453 >>> str(result.collect_column("t")[0].as_py())
24452454 '14:30:00'
2455+
2456+ Pass a format string as a bare ``str``:
2457+
2458+ >>> df = ctx.from_pydict({"a": ["14h30m00s"]})
2459+ >>> result = df.select(
2460+ ... dfn.functions.to_time(dfn.col("a"), "%Hh%Mm%Ss").alias("t"))
2461+ >>> str(result.collect_column("t")[0].as_py())
2462+ '14:30:00'
24462463 """
2447- return Expr (f .to_time (arg .expr , * _unwrap_exprs (formatters )))
2464+ return Expr (f .to_time (arg .expr , * _unwrap_exprs (coerce_to_expr_list ( formatters ) )))
24482465
24492466
2450- def to_timestamp (arg : Expr , * formatters : Expr ) -> Expr :
2467+ def to_timestamp (arg : Expr , * formatters : Expr | str ) -> Expr :
24512468 """Converts a string and optional formats to a ``Timestamp`` in nanoseconds.
24522469
24532470 For usage of ``formatters`` see the rust chrono package ``strftime`` package.
@@ -2464,11 +2481,24 @@ def to_timestamp(arg: Expr, *formatters: Expr) -> Expr:
24642481 ... )
24652482 >>> str(result.collect_column("ts")[0].as_py())
24662483 '2021-01-01 00:00:00'
2484+
2485+ Pass a format string as a bare ``str``:
2486+
2487+ >>> df = ctx.from_pydict({"a": ["01/01/2021 00:00:00"]})
2488+ >>> result = df.select(
2489+ ... dfn.functions.to_timestamp(
2490+ ... dfn.col("a"), "%d/%m/%Y %H:%M:%S"
2491+ ... ).alias("ts")
2492+ ... )
2493+ >>> str(result.collect_column("ts")[0].as_py())
2494+ '2021-01-01 00:00:00'
24672495 """
2468- return Expr (f .to_timestamp (arg .expr , * _unwrap_exprs (formatters )))
2496+ return Expr (
2497+ f .to_timestamp (arg .expr , * _unwrap_exprs (coerce_to_expr_list (formatters )))
2498+ )
24692499
24702500
2471- def to_timestamp_millis (arg : Expr , * formatters : Expr ) -> Expr :
2501+ def to_timestamp_millis (arg : Expr , * formatters : Expr | str ) -> Expr :
24722502 """Converts a string and optional formats to a ``Timestamp`` in milliseconds.
24732503
24742504 See :py:func:`to_timestamp` for a description on how to use formatters.
@@ -2483,11 +2513,24 @@ def to_timestamp_millis(arg: Expr, *formatters: Expr) -> Expr:
24832513 ... )
24842514 >>> str(result.collect_column("ts")[0].as_py())
24852515 '2021-01-01 00:00:00'
2516+
2517+ Pass a format string as a bare ``str``:
2518+
2519+ >>> df = ctx.from_pydict({"a": ["01/01/2021 00:00:00"]})
2520+ >>> result = df.select(
2521+ ... dfn.functions.to_timestamp_millis(
2522+ ... dfn.col("a"), "%d/%m/%Y %H:%M:%S"
2523+ ... ).alias("ts")
2524+ ... )
2525+ >>> str(result.collect_column("ts")[0].as_py())
2526+ '2021-01-01 00:00:00'
24862527 """
2487- return Expr (f .to_timestamp_millis (arg .expr , * _unwrap_exprs (formatters )))
2528+ return Expr (
2529+ f .to_timestamp_millis (arg .expr , * _unwrap_exprs (coerce_to_expr_list (formatters )))
2530+ )
24882531
24892532
2490- def to_timestamp_micros (arg : Expr , * formatters : Expr ) -> Expr :
2533+ def to_timestamp_micros (arg : Expr , * formatters : Expr | str ) -> Expr :
24912534 """Converts a string and optional formats to a ``Timestamp`` in microseconds.
24922535
24932536 See :py:func:`to_timestamp` for a description on how to use formatters.
@@ -2502,11 +2545,24 @@ def to_timestamp_micros(arg: Expr, *formatters: Expr) -> Expr:
25022545 ... )
25032546 >>> str(result.collect_column("ts")[0].as_py())
25042547 '2021-01-01 00:00:00'
2548+
2549+ Pass a format string as a bare ``str``:
2550+
2551+ >>> df = ctx.from_pydict({"a": ["01/01/2021 00:00:00"]})
2552+ >>> result = df.select(
2553+ ... dfn.functions.to_timestamp_micros(
2554+ ... dfn.col("a"), "%d/%m/%Y %H:%M:%S"
2555+ ... ).alias("ts")
2556+ ... )
2557+ >>> str(result.collect_column("ts")[0].as_py())
2558+ '2021-01-01 00:00:00'
25052559 """
2506- return Expr (f .to_timestamp_micros (arg .expr , * _unwrap_exprs (formatters )))
2560+ return Expr (
2561+ f .to_timestamp_micros (arg .expr , * _unwrap_exprs (coerce_to_expr_list (formatters )))
2562+ )
25072563
25082564
2509- def to_timestamp_nanos (arg : Expr , * formatters : Expr ) -> Expr :
2565+ def to_timestamp_nanos (arg : Expr , * formatters : Expr | str ) -> Expr :
25102566 """Converts a string and optional formats to a ``Timestamp`` in nanoseconds.
25112567
25122568 See :py:func:`to_timestamp` for a description on how to use formatters.
@@ -2521,11 +2577,24 @@ def to_timestamp_nanos(arg: Expr, *formatters: Expr) -> Expr:
25212577 ... )
25222578 >>> str(result.collect_column("ts")[0].as_py())
25232579 '2021-01-01 00:00:00'
2580+
2581+ Pass a format string as a bare ``str``:
2582+
2583+ >>> df = ctx.from_pydict({"a": ["01/01/2021 00:00:00"]})
2584+ >>> result = df.select(
2585+ ... dfn.functions.to_timestamp_nanos(
2586+ ... dfn.col("a"), "%d/%m/%Y %H:%M:%S"
2587+ ... ).alias("ts")
2588+ ... )
2589+ >>> str(result.collect_column("ts")[0].as_py())
2590+ '2021-01-01 00:00:00'
25242591 """
2525- return Expr (f .to_timestamp_nanos (arg .expr , * _unwrap_exprs (formatters )))
2592+ return Expr (
2593+ f .to_timestamp_nanos (arg .expr , * _unwrap_exprs (coerce_to_expr_list (formatters )))
2594+ )
25262595
25272596
2528- def to_timestamp_seconds (arg : Expr , * formatters : Expr ) -> Expr :
2597+ def to_timestamp_seconds (arg : Expr , * formatters : Expr | str ) -> Expr :
25292598 """Converts a string and optional formats to a ``Timestamp`` in seconds.
25302599
25312600 See :py:func:`to_timestamp` for a description on how to use formatters.
@@ -2540,11 +2609,26 @@ def to_timestamp_seconds(arg: Expr, *formatters: Expr) -> Expr:
25402609 ... )
25412610 >>> str(result.collect_column("ts")[0].as_py())
25422611 '2021-01-01 00:00:00'
2612+
2613+ Pass a format string as a bare ``str``:
2614+
2615+ >>> df = ctx.from_pydict({"a": ["01/01/2021 00:00:00"]})
2616+ >>> result = df.select(
2617+ ... dfn.functions.to_timestamp_seconds(
2618+ ... dfn.col("a"), "%d/%m/%Y %H:%M:%S"
2619+ ... ).alias("ts")
2620+ ... )
2621+ >>> str(result.collect_column("ts")[0].as_py())
2622+ '2021-01-01 00:00:00'
25432623 """
2544- return Expr (f .to_timestamp_seconds (arg .expr , * _unwrap_exprs (formatters )))
2624+ return Expr (
2625+ f .to_timestamp_seconds (
2626+ arg .expr , * _unwrap_exprs (coerce_to_expr_list (formatters ))
2627+ )
2628+ )
25452629
25462630
2547- def to_unixtime (string : Expr , * format_arguments : Expr ) -> Expr :
2631+ def to_unixtime (string : Expr , * format_arguments : Expr | str ) -> Expr :
25482632 """Converts a string and optional formats to a Unixtime.
25492633
25502634 Examples:
@@ -2553,8 +2637,23 @@ def to_unixtime(string: Expr, *format_arguments: Expr) -> Expr:
25532637 >>> result = df.select(dfn.functions.to_unixtime(dfn.col("a")).alias("u"))
25542638 >>> result.collect_column("u")[0].as_py()
25552639 0
2640+
2641+ Pass a format string as a bare ``str``:
2642+
2643+ >>> df = ctx.from_pydict({"a": ["01/01/1970 00:00:00"]})
2644+ >>> result = df.select(
2645+ ... dfn.functions.to_unixtime(
2646+ ... dfn.col("a"), "%d/%m/%Y %H:%M:%S"
2647+ ... ).alias("u")
2648+ ... )
2649+ >>> result.collect_column("u")[0].as_py()
2650+ 0
25562651 """
2557- return Expr (f .to_unixtime (string .expr , * _unwrap_exprs (format_arguments )))
2652+ return Expr (
2653+ f .to_unixtime (
2654+ string .expr , * _unwrap_exprs (coerce_to_expr_list (format_arguments ))
2655+ )
2656+ )
25582657
25592658
25602659def current_date () -> Expr :
@@ -2676,28 +2775,43 @@ def datetrunc(part: Expr | str, date: Expr) -> Expr:
26762775 return _date_trunc (part , date , "datetrunc" )
26772776
26782777
2679- def date_bin (stride : Expr , source : Expr , origin : Expr ) -> Expr :
2778+ def date_bin (stride : Expr | str , source : Expr | str , origin : Expr | str ) -> Expr :
26802779 """Coerces an arbitrary timestamp to the start of the nearest specified interval.
26812780
26822781 Examples:
26832782 >>> ctx = dfn.SessionContext()
26842783 >>> df = ctx.from_pydict({"timestamp": ['2021-07-15 12:34:56', '2021-01-01']})
26852784 >>> result = df.select(
26862785 ... dfn.functions.date_bin(
2687- ... dfn.string_literal( "15 minutes") ,
2786+ ... "15 minutes",
26882787 ... dfn.col("timestamp"),
2689- ... dfn.string_literal( "2001-01-01 00:00:00")
2788+ ... "2001-01-01 00:00:00",
26902789 ... ).alias("b")
26912790 ... )
26922791 >>> str(result.collect_column("b")[0].as_py())
26932792 '2021-07-15 12:30:00'
26942793 >>> str(result.collect_column("b")[1].as_py())
26952794 '2021-01-01 00:00:00'
2795+
2796+ ``source`` may also be a bare literal:
2797+
2798+ >>> result = df.select(
2799+ ... dfn.functions.date_bin(
2800+ ... "15 minutes", "2021-07-15 12:34:56", "2001-01-01 00:00:00"
2801+ ... ).alias("b")
2802+ ... )
2803+ >>> str(result.collect_column("b")[0].as_py())
2804+ '2021-07-15 12:30:00'
26962805 """
2806+ # date_bin's planner coerces Utf8 (not Utf8View) literals to Interval/Timestamp,
2807+ # so wrap bare strs via string_literal to force Utf8.
2808+ stride = Expr .string_literal (stride ) if isinstance (stride , str ) else stride
2809+ source = Expr .string_literal (source ) if isinstance (source , str ) else source
2810+ origin = Expr .string_literal (origin ) if isinstance (origin , str ) else origin
26972811 return Expr (f .date_bin (stride .expr , source .expr , origin .expr ))
26982812
26992813
2700- def make_date (year : Expr , month : Expr , day : Expr ) -> Expr :
2814+ def make_date (year : Expr | int , month : Expr | int , day : Expr | int ) -> Expr :
27012815 """Make a date from year, month and day component parts.
27022816
27032817 Examples:
@@ -2709,11 +2823,22 @@ def make_date(year: Expr, month: Expr, day: Expr) -> Expr:
27092823 ... dfn.col("d")).alias("dt"))
27102824 >>> result.collect_column("dt")[0].as_py()
27112825 datetime.date(2024, 1, 15)
2826+
2827+ Pass bare ints for any component:
2828+
2829+ >>> df = ctx.from_pydict({"y": [2024]})
2830+ >>> result = df.select(
2831+ ... dfn.functions.make_date(dfn.col("y"), 1, 15).alias("dt"))
2832+ >>> result.collect_column("dt")[0].as_py()
2833+ datetime.date(2024, 1, 15)
27122834 """
2835+ year = coerce_to_expr (year )
2836+ month = coerce_to_expr (month )
2837+ day = coerce_to_expr (day )
27132838 return Expr (f .make_date (year .expr , month .expr , day .expr ))
27142839
27152840
2716- def make_time (hour : Expr , minute : Expr , second : Expr ) -> Expr :
2841+ def make_time (hour : Expr | int , minute : Expr | int , second : Expr | int ) -> Expr :
27172842 """Make a time from hour, minute and second component parts.
27182843
27192844 Examples:
@@ -2724,7 +2849,18 @@ def make_time(hour: Expr, minute: Expr, second: Expr) -> Expr:
27242849 ... dfn.col("s")).alias("t"))
27252850 >>> result.collect_column("t")[0].as_py()
27262851 datetime.time(12, 30)
2852+
2853+ Pass bare ints for any component:
2854+
2855+ >>> df = ctx.from_pydict({"h": [12]})
2856+ >>> result = df.select(
2857+ ... dfn.functions.make_time(dfn.col("h"), 30, 0).alias("t"))
2858+ >>> result.collect_column("t")[0].as_py()
2859+ datetime.time(12, 30)
27272860 """
2861+ hour = coerce_to_expr (hour )
2862+ minute = coerce_to_expr (minute )
2863+ second = coerce_to_expr (second )
27282864 return Expr (f .make_time (hour .expr , minute .expr , second .expr ))
27292865
27302866
0 commit comments