Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions r/sedonadb/NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

S3method("$",sedonadb_table_ref)
S3method("$<-",savvy_sedonadb__sealed)
S3method("[[<-",savvy_sedonadb__sealed)
S3method(.DollarNames,sedonadb_fns)
Expand Down Expand Up @@ -43,16 +44,19 @@ S3method(print,sedonadb_join_by)
S3method(print,sedonadb_join_select)
S3method(print,sedonadb_join_select_default)
export(.fns)
export(.tables)
export(as_sd_expr)
export(as_sedonadb_dataframe)
export(as_sedonadb_literal)
export(is_sd_expr)
export(sd_anti_join)
export(sd_arrange)
export(sd_collect)
export(sd_compute)
export(sd_configure_proj)
export(sd_connect)
export(sd_count)
export(sd_cross_join)
export(sd_ctx_drop_view)
export(sd_ctx_read_parquet)
export(sd_ctx_read_sf)
Expand All @@ -72,16 +76,22 @@ export(sd_expr_negative)
export(sd_expr_parse_binary)
export(sd_expr_scalar_function)
export(sd_filter)
export(sd_full_join)
export(sd_group_by)
export(sd_inner_join)
export(sd_join)
export(sd_join_by)
export(sd_join_intersects)
export(sd_join_select)
export(sd_join_select_default)
export(sd_left_join)
export(sd_preview)
export(sd_read_parquet)
export(sd_read_sf)
export(sd_register_udf)
export(sd_right_join)
export(sd_select)
export(sd_semi_join)
export(sd_sql)
export(sd_summarise)
export(sd_summarize)
Expand Down
166 changes: 166 additions & 0 deletions r/sedonadb/R/dataframe-join.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

#' Join two SedonaDB DataFrames
#'
#' Perform a join operation between two dataframes. Use [sd_join_by()] to
#' specify join conditions using `x$column` and `y$column` syntax to
#' reference columns from the left and right tables respectively.
#'
#' @param x The left dataframe
#' @param y The right dataframe
#' @param by Join specification. One of:
#' - A `sedonadb_join_by` object from [sd_join_by()]
#' - A character vector of column names to join on in both tables
#' - A named character vector mapping left-table column names to
#' right-table column names, e.g. `c(x_val = "y_val")`
#' - `NULL` for a natural join on columns with matching names
#' @param join_type The type of join to perform. One of "inner", "left", "right",
#' "full", "leftsemi", "rightsemi", "leftanti", "rightanti", "leftmark",
#' or "rightmark".
#' @param select Post-join column selection. One of
#' - `NULL` for no modification, which may result in duplicate (unqualified)
#' column names. The column may still be
#' referred to with a qualifier in advanced usage using [sd_expr_column()].
#' - [sd_join_select_default()] for dplyr-like behaviour (equi-join keys
#' removed, intersecting names suffixed)
#' - [sd_join_select()] for a custom selection
#' @param keep Use `TRUE` to keep all key column in an equijoin or spatial join.
#' This is only applied when using [sd_join_select_default()].
#'
#' @returns An object of class sedonadb_dataframe
#' @export
#'
#' @examples
#' df1 <- data.frame(x = letters[1:10], y = 1:10)
#' df2 <- data.frame(y = 10:1, z = LETTERS[1:10])
#' df1 |> sd_join(df2)
#'
sd_join <- function(
x,
y,
by = NULL,
join_type = "inner",
select = sd_join_select_default(),
keep = NULL
) {
x <- as_sedonadb_dataframe(x)
y <- as_sedonadb_dataframe(y, ctx = x$ctx)

x_schema <- infer_nanoarrow_schema(x)
y_schema <- infer_nanoarrow_schema(y)
join_expr_ctx <- sd_join_expr_ctx(x_schema, y_schema, ctx = x$ctx)
join_conditions <- sd_build_join_conditions(join_expr_ctx, by, ctx = x$ctx)

df <- x$df$join(y$df, join_conditions, join_type, left_alias = "x", right_alias = "y")
out <- new_sedonadb_dataframe(x$ctx, df)

# Apply post-join column selection if needed
if (is.null(select)) {
projection <- NULL
} else if (inherits(select, "sedonadb_join_select_default")) {
# Default select: remove duplicate equijoin keys, apply suffixes
projection <- sd_build_default_select(
join_expr_ctx,
join_conditions,
select$suffix,
join_type,
keep = keep
)
} else if (inherits(select, "sedonadb_join_select")) {
# Custom select: evaluate user expressions
projection <- sd_eval_join_select_exprs(select, join_expr_ctx)
} else {
stop(
"`select` must be NULL, sd_join_select_default(), or sd_join_select()",
call. = FALSE
)
}

# NULL return from these functions means that no extra projecting is needed
if (is.null(projection)) {
out
} else {
sd_transmute(out, !!!projection)
}
}

#' @rdname sd_join
#' @export
sd_left_join <- function(
x,
y,
by = NULL,
select = sd_join_select_default(),
keep = NULL
) {
sd_join(x, y, by = by, select = select, join_type = "left", keep = keep)
}

#' @rdname sd_join
#' @export
sd_right_join <- function(
x,
y,
by = NULL,
select = sd_join_select_default(),
keep = NULL
) {
sd_join(x, y, by = by, select = select, join_type = "right", keep = keep)
}

#' @rdname sd_join
#' @export
sd_inner_join <- function(
x,
y,
by = NULL,
select = sd_join_select_default(),
keep = NULL
) {
sd_join(x, y, by = by, select = select, join_type = "inner", keep = keep)
}

#' @rdname sd_join
#' @export
sd_full_join <- function(
x,
y,
by = NULL,
select = sd_join_select_default(),
keep = NULL
) {
sd_join(x, y, by = by, select = select, join_type = "full", keep = keep)
}

#' @rdname sd_join
#' @export
sd_semi_join <- function(x, y, by = NULL) {
sd_join(x, y, by = by, join_type = "leftsemi")
}

#' @rdname sd_join
#' @export
sd_anti_join <- function(x, y, by = NULL) {
sd_join(x, y, by = by, join_type = "leftanti")
}

#' @rdname sd_join
#' @export
sd_cross_join <- function(x, y, by = NULL, select = sd_join_select_default()) {
sd_join(x, y, by = character(), select = select, join_type = "inner")
}
80 changes: 0 additions & 80 deletions r/sedonadb/R/dataframe.R
Original file line number Diff line number Diff line change
Expand Up @@ -496,86 +496,6 @@ sd_summarize <- function(.data, ..., .env = parent.frame()) {
sd_summarise(.data, ..., .env = .env)
}

#' Join two SedonaDB DataFrames
#'
#' Perform a join operation between two dataframes. Use [sd_join_by()] to
#' specify join conditions using `x$column` and `y$column` syntax to
#' reference columns from the left and right tables respectively.
#'
#' @param x The left dataframe
#' @param y The right dataframe (will use the same context as x)
#' @param by Join specification. One of:
#' - A `sedonadb_join_by` object from [sd_join_by()]
#' - A character vector of column names to join on in both tables
#' - A named character vector mapping left-table column names to
#' right-table column names, e.g. `c(x_val = "y_val")`
#' - `NULL` for a natural join on columns with matching names
#' @param join_type The type of join to perform. One of "inner", "left", "right",
#' "full", "leftsemi", "rightsemi", "leftanti", "rightanti", "leftmark",
#' or "rightmark".
#' @param select Post-join column selection. One of
#' - `NULL` for no modification, which may result in duplicate (unqualified)
#' column names. The column may still be
#' referred to with a qualifier in advanced usage using [sd_expr_column()].
#' - [sd_join_select_default()] for dplyr-like behaviour (equi-join keys
#' removed, intersecting names suffixed)
#' - [sd_join_select()] for a custom selection
#'
#' @returns An object of class sedonadb_dataframe
#' @export
#'
#' @examples
#' df1 <- data.frame(x = letters[1:10], y = 1:10)
#' df2 <- data.frame(y = 10:1, z = LETTERS[1:10])
#' df1 |> sd_join(df2)
#'
sd_join <- function(
x,
y,
by = NULL,
join_type = "inner",
select = sd_join_select_default()
) {
x <- as_sedonadb_dataframe(x)
y <- as_sedonadb_dataframe(y, ctx = x$ctx)

x_schema <- infer_nanoarrow_schema(x)
y_schema <- infer_nanoarrow_schema(y)
join_expr_ctx <- sd_join_expr_ctx(x_schema, y_schema, ctx = x$ctx)
join_conditions <- sd_build_join_conditions(join_expr_ctx, by, ctx = x$ctx)

df <- x$df$join(y$df, join_conditions, join_type, left_alias = "x", right_alias = "y")
out <- new_sedonadb_dataframe(x$ctx, df)

# Apply post-join column selection if needed
if (is.null(select)) {
projection <- NULL
} else if (inherits(select, "sedonadb_join_select_default")) {
# Default select: remove duplicate equijoin keys, apply suffixes
projection <- sd_build_default_select(
join_expr_ctx,
join_conditions,
select$suffix,
join_type
)
} else if (inherits(select, "sedonadb_join_select")) {
# Custom select: evaluate user expressions
projection <- sd_eval_join_select_exprs(select, join_expr_ctx)
} else {
stop(
"`select` must be NULL, sd_join_select_default(), or sd_join_select()",
call. = FALSE
)
}

# NULL return from these functions means that no extra projecting is needed
if (is.null(projection)) {
out
} else {
sd_transmute(out, !!!projection)
}
}

#' Write DataFrame to (Geo)Parquet files
#'
#' Write this DataFrame to one or more (Geo)Parquet files. For input that contains
Expand Down
Loading
Loading