@@ -170,28 +170,32 @@ ghci> D.innerJoin ["key"] df other
170170@
171171-}
172172innerJoin :: [T. Text ] -> DataFrame -> DataFrame -> DataFrame
173- innerJoin cs right left =
174- let csSet = S. fromList cs
175- leftRows = fst (D. dimensions left)
176- rightRows = fst (D. dimensions right)
177-
178- leftKeyIdxs = keyColIndices csSet left
179- rightKeyIdxs = keyColIndices csSet right
180- leftHashes = D. computeRowHashes leftKeyIdxs left
181- rightHashes = D. computeRowHashes rightKeyIdxs right
182-
183- buildRows = min leftRows rightRows
184- (leftIxs, rightIxs)
185- | buildRows > joinStrategyThreshold =
186- sortMergeInnerKernel leftHashes rightHashes
187- | rightRows <= leftRows =
188- -- Build on right (smaller or equal), probe with left
189- hashInnerKernel leftHashes rightHashes
190- | otherwise =
191- -- Build on left (smaller), probe with right, swap result
192- let (! rIxs, ! lIxs) = hashInnerKernel rightHashes leftHashes
193- in (lIxs, rIxs)
194- in assembleInner csSet left right leftIxs rightIxs
173+ innerJoin cs left right
174+ | D. null right || D. null left = D. empty
175+ | otherwise =
176+ let
177+ csSet = S. fromList cs
178+ leftRows = fst (D. dimensions left)
179+ rightRows = fst (D. dimensions right)
180+
181+ leftKeyIdxs = keyColIndices csSet left
182+ rightKeyIdxs = keyColIndices csSet right
183+ leftHashes = D. computeRowHashes leftKeyIdxs left
184+ rightHashes = D. computeRowHashes rightKeyIdxs right
185+
186+ buildRows = min leftRows rightRows
187+ (leftIxs, rightIxs)
188+ | buildRows > joinStrategyThreshold =
189+ sortMergeInnerKernel leftHashes rightHashes
190+ | rightRows <= leftRows =
191+ -- Build on right (smaller or equal), probe with left
192+ hashInnerKernel leftHashes rightHashes
193+ | otherwise =
194+ -- Build on left (smaller), probe with right, swap result
195+ let (! rIxs, ! lIxs) = hashInnerKernel rightHashes leftHashes
196+ in (lIxs, rIxs)
197+ in
198+ assembleInner csSet left right leftIxs rightIxs
195199
196200{- | Hash-based inner join kernel.
197201Builds compact index on @buildHashes@ (second arg), probes with
@@ -369,23 +373,28 @@ ghci> D.leftJoin ["key"] df other
369373@
370374-}
371375leftJoin :: [T. Text ] -> DataFrame -> DataFrame -> DataFrame
372- leftJoin cs right left =
373- let csSet = S. fromList cs
374- rightRows = fst (D. dimensions right)
375-
376- leftKeyIdxs = keyColIndices csSet left
377- rightKeyIdxs = keyColIndices csSet right
378- leftHashes = D. computeRowHashes leftKeyIdxs left
379- rightHashes = D. computeRowHashes rightKeyIdxs right
380-
381- -- Right is always the build side for left join
382- (leftIxs, rightIxs)
383- | rightRows > joinStrategyThreshold =
384- sortMergeLeftKernel leftHashes rightHashes
385- | otherwise =
386- hashLeftKernel leftHashes rightHashes
387- in -- rightIxs uses -1 as sentinel for "no match"
388- assembleLeft csSet left right leftIxs rightIxs
376+ leftJoin cs left right
377+ | D. null right || D. nRows right == 0 = left
378+ | D. null left || D. nRows left == 0 = D. empty
379+ | otherwise =
380+ let
381+ csSet = S. fromList cs
382+ rightRows = fst (D. dimensions right)
383+
384+ leftKeyIdxs = keyColIndices csSet left
385+ rightKeyIdxs = keyColIndices csSet right
386+ leftHashes = D. computeRowHashes leftKeyIdxs left
387+ rightHashes = D. computeRowHashes rightKeyIdxs right
388+
389+ -- Right is always the build side for left join
390+ (leftIxs, rightIxs)
391+ | rightRows > joinStrategyThreshold =
392+ sortMergeLeftKernel leftHashes rightHashes
393+ | otherwise =
394+ hashLeftKernel leftHashes rightHashes
395+ in
396+ -- rightIxs uses -1 as sentinel for "no match"
397+ assembleLeft csSet left right leftIxs rightIxs
389398
390399{- | Hash-based left join kernel.
391400Returns @(leftExpandedIndices, rightExpandedIndices)@ where
@@ -574,24 +583,29 @@ rightJoin cs left right = leftJoin cs right left
574583
575584fullOuterJoin ::
576585 [T. Text ] -> DataFrame -> DataFrame -> DataFrame
577- fullOuterJoin cs right left =
578- let csSet = S. fromList cs
579- leftRows = fst (D. dimensions left)
580- rightRows = fst (D. dimensions right)
581-
582- leftKeyIdxs = keyColIndices csSet left
583- rightKeyIdxs = keyColIndices csSet right
584- leftHashes = D. computeRowHashes leftKeyIdxs left
585- rightHashes = D. computeRowHashes rightKeyIdxs right
586-
587- -- Both sides can have nulls in full outer
588- (leftIxs, rightIxs)
589- | max leftRows rightRows > joinStrategyThreshold =
590- sortMergeFullOuterKernel leftHashes rightHashes
591- | otherwise =
592- hashFullOuterKernel leftHashes rightHashes
593- in -- Both index vectors use -1 as sentinel
594- assembleFullOuter csSet left right leftIxs rightIxs
586+ fullOuterJoin cs left right
587+ | D. null right || D. nRows right == 0 = left
588+ | D. null left || D. nRows left == 0 = right
589+ | otherwise =
590+ let
591+ csSet = S. fromList cs
592+ leftRows = fst (D. dimensions left)
593+ rightRows = fst (D. dimensions right)
594+
595+ leftKeyIdxs = keyColIndices csSet left
596+ rightKeyIdxs = keyColIndices csSet right
597+ leftHashes = D. computeRowHashes leftKeyIdxs left
598+ rightHashes = D. computeRowHashes rightKeyIdxs right
599+
600+ -- Both sides can have nulls in full outer
601+ (leftIxs, rightIxs)
602+ | max leftRows rightRows > joinStrategyThreshold =
603+ sortMergeFullOuterKernel leftHashes rightHashes
604+ | otherwise =
605+ hashFullOuterKernel leftHashes rightHashes
606+ in
607+ -- Both index vectors use -1 as sentinel
608+ assembleFullOuter csSet left right leftIxs rightIxs
595609
596610{- | Hash-based full outer join kernel.
597611Builds compact indices on both sides.
0 commit comments