From 83076946462b48d96d6a54e80834fe6ad9f0ac4a Mon Sep 17 00:00:00 2001 From: Jun Sekine Date: Sun, 17 May 2026 02:45:05 +0900 Subject: [PATCH 1/4] Add JMH benchmark subprojects for v1.10.0 vs v2.0.0 comparison (#172) Scaffolds four JVM-only subprojects under benchmark/ to visualize the performance characteristics of v1.10.0 (Maven artifact, com.github.doyaaaaaken.*) against v2.0.0 (this branch, com.jsoizo.*) on identical workloads. Subprojects: - benchmark/shared: deterministic data generation (CsvDataGen / DatasetSpec / DataStats) and environment probe. Depends only on kotlin-stdlib so it stays out of every benchmark classpath as a library. - benchmark/v1: JMH source set whose only kotlin-csv on classpath is com.jsoizo:kotlin-csv-jvm:1.10.0. Covers readAll(String/InputStream/File), iterative Sequence over File, readAllWithHeader, writeAll(OutputStream/File). - benchmark/v2: JMH source set whose only kotlin-csv on classpath is the current project. Mirrors the v1 workloads on the v2 API and adds V2BackendBenchmarks comparing java.io vs kotlinx-io paths. - benchmark/parity: JUnit subproject that intentionally puts both v1 and v2 on the test classpath (FQCNs do not collide) and asserts row-by-row equality on the HARD dataset for readAll/readAllWithHeader/writeAll. Classpath isolation is achieved by separating v1 and v2 into different Gradle resolution scopes; this stops Gradle from collapsing kotlinx-coroutines (and other transitive deps) to a single version across the two artifacts. Resolved jmhRuntimeClasspath was verified to contain v1 only on the v1 side and the v2 project only on the v2 side. JMH defaults: warmup=5, iterations=5, fork=2, modes=throughput+avgt, jvmArgs=[-Xms2g,-Xmx2g], JDK 21 toolchain. -Pbench.profile=large|gcprof| stackprof overrides the defaults for the long-running LARGE dataset and profiler runs. -Pjmh.include / -Pjmh.warmupIterations / -Pjmh.iterations / -Pjmh.fork allow per-invocation overrides for smoke runs. --- benchmark/parity/build.gradle.kts | 19 +++ .../kotlincsv/bench/parity/AssertRowsEqual.kt | 21 ++++ .../kotlincsv/bench/parity/ParityFixtures.kt | 10 ++ .../bench/parity/ParityHeaderTest.kt | 32 ++++++ .../kotlincsv/bench/parity/ParityReadTest.kt | 58 ++++++++++ .../kotlincsv/bench/parity/ParityWriteTest.kt | 29 +++++ benchmark/shared/build.gradle.kts | 7 ++ .../kotlincsv/bench/shared/CsvDataGen.kt | 108 ++++++++++++++++++ .../kotlincsv/bench/shared/DataStats.kt | 31 +++++ .../kotlincsv/bench/shared/DatasetSpec.kt | 15 +++ .../jsoizo/kotlincsv/bench/shared/EnvProbe.kt | 49 ++++++++ benchmark/v1/build.gradle.kts | 54 +++++++++ .../kotlincsv/bench/v1/ReadBenchmarksV1.kt | 43 +++++++ .../kotlincsv/bench/v1/WriteBenchmarksV1.kt | 30 +++++ .../bench/v1/state/FileInputStateV1.kt | 55 +++++++++ .../bench/v1/state/GeneratedDataStateV1.kt | 45 ++++++++ .../bench/v1/state/OutputSinkStateV1.kt | 44 +++++++ benchmark/v2/build.gradle.kts | 55 +++++++++ .../kotlincsv/bench/v2/ReadBenchmarksV2.kt | 47 ++++++++ .../kotlincsv/bench/v2/V2BackendBenchmarks.kt | 50 ++++++++ .../kotlincsv/bench/v2/WriteBenchmarksV2.kt | 32 ++++++ .../bench/v2/state/FileInputStateV2.kt | 60 ++++++++++ .../bench/v2/state/GeneratedDataStateV2.kt | 45 ++++++++ .../bench/v2/state/OutputSinkStateV2.kt | 47 ++++++++ build.gradle.kts | 2 + gradle/libs.versions.toml | 8 ++ settings.gradle.kts | 2 + 27 files changed, 998 insertions(+) create mode 100644 benchmark/parity/build.gradle.kts create mode 100644 benchmark/parity/src/test/kotlin/com/jsoizo/kotlincsv/bench/parity/AssertRowsEqual.kt create mode 100644 benchmark/parity/src/test/kotlin/com/jsoizo/kotlincsv/bench/parity/ParityFixtures.kt create mode 100644 benchmark/parity/src/test/kotlin/com/jsoizo/kotlincsv/bench/parity/ParityHeaderTest.kt create mode 100644 benchmark/parity/src/test/kotlin/com/jsoizo/kotlincsv/bench/parity/ParityReadTest.kt create mode 100644 benchmark/parity/src/test/kotlin/com/jsoizo/kotlincsv/bench/parity/ParityWriteTest.kt create mode 100644 benchmark/shared/build.gradle.kts create mode 100644 benchmark/shared/src/main/kotlin/com/jsoizo/kotlincsv/bench/shared/CsvDataGen.kt create mode 100644 benchmark/shared/src/main/kotlin/com/jsoizo/kotlincsv/bench/shared/DataStats.kt create mode 100644 benchmark/shared/src/main/kotlin/com/jsoizo/kotlincsv/bench/shared/DatasetSpec.kt create mode 100644 benchmark/shared/src/main/kotlin/com/jsoizo/kotlincsv/bench/shared/EnvProbe.kt create mode 100644 benchmark/v1/build.gradle.kts create mode 100644 benchmark/v1/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v1/ReadBenchmarksV1.kt create mode 100644 benchmark/v1/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v1/WriteBenchmarksV1.kt create mode 100644 benchmark/v1/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v1/state/FileInputStateV1.kt create mode 100644 benchmark/v1/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v1/state/GeneratedDataStateV1.kt create mode 100644 benchmark/v1/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v1/state/OutputSinkStateV1.kt create mode 100644 benchmark/v2/build.gradle.kts create mode 100644 benchmark/v2/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v2/ReadBenchmarksV2.kt create mode 100644 benchmark/v2/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v2/V2BackendBenchmarks.kt create mode 100644 benchmark/v2/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v2/WriteBenchmarksV2.kt create mode 100644 benchmark/v2/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v2/state/FileInputStateV2.kt create mode 100644 benchmark/v2/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v2/state/GeneratedDataStateV2.kt create mode 100644 benchmark/v2/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v2/state/OutputSinkStateV2.kt diff --git a/benchmark/parity/build.gradle.kts b/benchmark/parity/build.gradle.kts new file mode 100644 index 0000000..b96855a --- /dev/null +++ b/benchmark/parity/build.gradle.kts @@ -0,0 +1,19 @@ +plugins { + alias(libs.plugins.kotlinJvm) +} + +kotlin { + jvmToolchain(21) +} + +dependencies { + testImplementation(project(":")) + testImplementation(project(":benchmark:shared")) + testImplementation(libs.kotlincsv.v1.jvm) + testImplementation(libs.bundles.kotest) + testImplementation(libs.kotlin.test.junit5) +} + +tasks.withType().configureEach { + useJUnitPlatform() +} diff --git a/benchmark/parity/src/test/kotlin/com/jsoizo/kotlincsv/bench/parity/AssertRowsEqual.kt b/benchmark/parity/src/test/kotlin/com/jsoizo/kotlincsv/bench/parity/AssertRowsEqual.kt new file mode 100644 index 0000000..f278fa6 --- /dev/null +++ b/benchmark/parity/src/test/kotlin/com/jsoizo/kotlincsv/bench/parity/AssertRowsEqual.kt @@ -0,0 +1,21 @@ +package com.jsoizo.kotlincsv.bench.parity + +import kotlin.test.assertEquals + +fun assertRowsEqual(expected: List>, actual: List>) { + assertEquals(expected.size, actual.size, "row count mismatch") + for (i in expected.indices) { + val e = expected[i] + val a = actual[i] + if (e.size != a.size) { + throw AssertionError("row $i col count mismatch: expected=${e.size} actual=${a.size}\n expected=$e\n actual=$a") + } + for (j in e.indices) { + if (e[j] != a[j]) { + throw AssertionError("row $i col $j mismatch: expected=${e[j].quoted()} actual=${a[j].quoted()}") + } + } + } +} + +private fun String.quoted(): String = "\"" + replace("\\", "\\\\").replace("\"", "\\\"") + "\"" diff --git a/benchmark/parity/src/test/kotlin/com/jsoizo/kotlincsv/bench/parity/ParityFixtures.kt b/benchmark/parity/src/test/kotlin/com/jsoizo/kotlincsv/bench/parity/ParityFixtures.kt new file mode 100644 index 0000000..f321788 --- /dev/null +++ b/benchmark/parity/src/test/kotlin/com/jsoizo/kotlincsv/bench/parity/ParityFixtures.kt @@ -0,0 +1,10 @@ +package com.jsoizo.kotlincsv.bench.parity + +import com.jsoizo.kotlincsv.bench.shared.CsvDataGen +import com.jsoizo.kotlincsv.bench.shared.DatasetSpec + +object ParityFixtures { + val hard: CsvDataGen.Generated by lazy { + CsvDataGen.generate(DatasetSpec.HARD, CsvDataGen.DEFAULT_SEED) + } +} diff --git a/benchmark/parity/src/test/kotlin/com/jsoizo/kotlincsv/bench/parity/ParityHeaderTest.kt b/benchmark/parity/src/test/kotlin/com/jsoizo/kotlincsv/bench/parity/ParityHeaderTest.kt new file mode 100644 index 0000000..dc2ac95 --- /dev/null +++ b/benchmark/parity/src/test/kotlin/com/jsoizo/kotlincsv/bench/parity/ParityHeaderTest.kt @@ -0,0 +1,32 @@ +package com.jsoizo.kotlincsv.bench.parity + +import com.github.doyaaaaaken.kotlincsv.dsl.csvReader as v1csvReader +import com.jsoizo.kotlincsv.csvReader as v2csvReader +import com.jsoizo.kotlincsv.reader.withHeader +import org.junit.jupiter.api.Test +import kotlin.test.assertEquals + +class ParityHeaderTest { + + @Test + fun readAllWithHeader_string_v1_v2_parity() { + val headers = (0 until ParityFixtures.hard.rows[0].size) + .joinToString(",") { "col$it" } + val body = ParityFixtures.hard.csvText + val text = headers + "\r\n" + body + + val v1: List> = v1csvReader().readAllWithHeader(text) + val v2Rows: List> = + v2csvReader().readAll(text).asSequence().withHeader().toList() + + assertEquals(v1.size, v2Rows.size, "row count mismatch") + for (i in v1.indices) { + val e = v1[i] + val a = v2Rows[i] + assertEquals(e.keys.toList(), a.keys.toList(), "row $i header keys mismatch") + for (k in e.keys) { + assertEquals(e[k], a[k], "row $i key '$k' mismatch") + } + } + } +} diff --git a/benchmark/parity/src/test/kotlin/com/jsoizo/kotlincsv/bench/parity/ParityReadTest.kt b/benchmark/parity/src/test/kotlin/com/jsoizo/kotlincsv/bench/parity/ParityReadTest.kt new file mode 100644 index 0000000..6b2c595 --- /dev/null +++ b/benchmark/parity/src/test/kotlin/com/jsoizo/kotlincsv/bench/parity/ParityReadTest.kt @@ -0,0 +1,58 @@ +package com.jsoizo.kotlincsv.bench.parity + +import com.github.doyaaaaaken.kotlincsv.dsl.csvReader as v1csvReader +import com.jsoizo.kotlincsv.csvReader as v2csvReader +import com.jsoizo.kotlincsv.reader.readAll as v2readAll +import com.jsoizo.kotlincsv.reader.readAllFromFile as v2readAllFromFile +import org.junit.jupiter.api.AfterAll +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.TestInstance +import java.io.ByteArrayInputStream +import java.io.File + +@TestInstance(TestInstance.Lifecycle.PER_CLASS) +class ParityReadTest { + + private lateinit var tempFile: File + + @BeforeAll + fun setUp() { + tempFile = File.createTempFile("parity-hard-", ".csv").apply { + writeBytes(ParityFixtures.hard.csvBytes) + } + } + + @AfterAll + fun tearDown() { + tempFile.delete() + } + + @Test + fun readAll_string_v1_v2_parity() { + val v1 = v1csvReader().readAll(ParityFixtures.hard.csvText) + val v2 = v2csvReader().readAll(ParityFixtures.hard.csvText) + assertRowsEqual(v1, v2) + assertRowsEqual(ParityFixtures.hard.rows, v2) + } + + @Test + fun readAll_inputStream_v1_v2_parity() { + val v1 = ByteArrayInputStream(ParityFixtures.hard.csvBytes).use { + v1csvReader().readAll(it) + } + val v2 = ByteArrayInputStream(ParityFixtures.hard.csvBytes).use { + v2csvReader().v2readAll(it) + } + assertRowsEqual(v1, v2) + assertRowsEqual(ParityFixtures.hard.rows, v2) + } + + @Test + fun readAll_file_v1_v2_parity() { + val v1 = v1csvReader().readAll(tempFile) + val v2 = v2csvReader().v2readAllFromFile(tempFile) + assertRowsEqual(v1, v2) + assertRowsEqual(ParityFixtures.hard.rows, v2) + } +} diff --git a/benchmark/parity/src/test/kotlin/com/jsoizo/kotlincsv/bench/parity/ParityWriteTest.kt b/benchmark/parity/src/test/kotlin/com/jsoizo/kotlincsv/bench/parity/ParityWriteTest.kt new file mode 100644 index 0000000..87d2100 --- /dev/null +++ b/benchmark/parity/src/test/kotlin/com/jsoizo/kotlincsv/bench/parity/ParityWriteTest.kt @@ -0,0 +1,29 @@ +package com.jsoizo.kotlincsv.bench.parity + +import com.github.doyaaaaaken.kotlincsv.dsl.csvReader as v1csvReader +import com.github.doyaaaaaken.kotlincsv.dsl.csvWriter as v1csvWriter +import com.jsoizo.kotlincsv.csvReader as v2csvReader +import com.jsoizo.kotlincsv.csvWriter as v2csvWriter +import com.jsoizo.kotlincsv.writer.write as v2write +import org.junit.jupiter.api.Test +import java.io.ByteArrayOutputStream + +class ParityWriteTest { + + @Test + fun writeAll_outputStream_then_v1_reparse_matches_input() { + val rows = ParityFixtures.hard.rows + + val v1Out = ByteArrayOutputStream() + v1csvWriter().writeAll(rows, v1Out) + val v1Reparsed = v1csvReader().readAll(v1Out.toString(Charsets.UTF_8)) + + val v2Out = ByteArrayOutputStream() + v2csvWriter().v2write(rows.asSequence(), v2Out, "UTF-8") + val v2Reparsed = v2csvReader().readAll(v2Out.toString(Charsets.UTF_8)) + + assertRowsEqual(rows, v1Reparsed) + assertRowsEqual(rows, v2Reparsed) + assertRowsEqual(v1Reparsed, v2Reparsed) + } +} diff --git a/benchmark/shared/build.gradle.kts b/benchmark/shared/build.gradle.kts new file mode 100644 index 0000000..2106ece --- /dev/null +++ b/benchmark/shared/build.gradle.kts @@ -0,0 +1,7 @@ +plugins { + alias(libs.plugins.kotlinJvm) +} + +kotlin { + jvmToolchain(21) +} diff --git a/benchmark/shared/src/main/kotlin/com/jsoizo/kotlincsv/bench/shared/CsvDataGen.kt b/benchmark/shared/src/main/kotlin/com/jsoizo/kotlincsv/bench/shared/CsvDataGen.kt new file mode 100644 index 0000000..40fa4ab --- /dev/null +++ b/benchmark/shared/src/main/kotlin/com/jsoizo/kotlincsv/bench/shared/CsvDataGen.kt @@ -0,0 +1,108 @@ +package com.jsoizo.kotlincsv.bench.shared + +import kotlin.random.Random + +object CsvDataGen { + const val DEFAULT_SEED: Long = 42L + + private val asciiPrintable: CharArray = + (('a'..'z') + ('A'..'Z') + ('0'..'9') + " .-_/".toList()).toCharArray() + + private val hiragana: CharArray = ('ぁ'..'ん').toList().toCharArray() + + data class Generated( + val rows: List>, + val csvText: String, + val csvBytes: ByteArray, + val stats: DataStats, + ) + + fun generate(spec: DatasetSpec, seed: Long = DEFAULT_SEED): Generated { + val random = Random(seed) + val rows = ArrayList>(spec.rows) + var totalLen = 0L + var maxLen = 0 + var quoteCells = 0 + var commaCells = 0 + var newlineCells = 0 + var multiByteCells = 0 + for (r in 0 until spec.rows) { + val row = ArrayList(spec.cols) + for (c in 0 until spec.cols) { + val len = 4 + random.nextInt(12) // 4..15 + val sb = StringBuilder(len) + var hasQuote = false + var hasComma = false + var hasNewline = false + var hasMultiByte = false + for (i in 0 until len) { + val pickMulti = spec.utf8MultiByteRate > 0.0 && random.nextDouble() < spec.utf8MultiByteRate / 2.0 + val ch: Char = when { + pickMulti -> { + hasMultiByte = true + hiragana[random.nextInt(hiragana.size)] + } + else -> asciiPrintable[random.nextInt(asciiPrintable.size)] + } + sb.append(ch) + } + if (spec.embeddedCommaRate > 0.0 && random.nextDouble() < spec.embeddedCommaRate) { + sb.insert(random.nextInt(sb.length + 1), ',') + hasComma = true + } + if (spec.embeddedNewlineRate > 0.0 && random.nextDouble() < spec.embeddedNewlineRate) { + sb.insert(random.nextInt(sb.length + 1), '\n') + hasNewline = true + } + if (spec.quoteRate > 0.0 && random.nextDouble() < spec.quoteRate) { + sb.insert(random.nextInt(sb.length + 1), '"') + hasQuote = true + } + val cell = sb.toString() + row.add(cell) + totalLen += cell.length + if (cell.length > maxLen) maxLen = cell.length + if (hasQuote) quoteCells++ + if (hasComma) commaCells++ + if (hasNewline) newlineCells++ + if (hasMultiByte) multiByteCells++ + } + rows.add(row) + } + val csvText = encode(rows) + val csvBytes = csvText.toByteArray(Charsets.UTF_8) + val totalCells = spec.rows.toLong() * spec.cols + val stats = DataStats( + dataset = spec.name, + rows = spec.rows, + cols = spec.cols, + seed = seed, + avgCellLen = if (totalCells == 0L) 0.0 else totalLen.toDouble() / totalCells, + maxCellLen = maxLen, + quoteRate = if (totalCells == 0L) 0.0 else quoteCells.toDouble() / totalCells, + embeddedCommaRate = if (totalCells == 0L) 0.0 else commaCells.toDouble() / totalCells, + embeddedNewlineRate = if (totalCells == 0L) 0.0 else newlineCells.toDouble() / totalCells, + utf8MultiByteCellRate = if (totalCells == 0L) 0.0 else multiByteCells.toDouble() / totalCells, + totalBytes = csvBytes.size.toLong(), + ) + return Generated(rows, csvText, csvBytes, stats) + } + + private fun encode(rows: List>): String { + val sb = StringBuilder() + for (row in rows) { + var first = true + for (cell in row) { + if (!first) sb.append(',') + first = false + if (cell.contains('"') || cell.contains(',') || cell.contains('\n') || cell.contains('\r')) { + sb.append('"').append(cell.replace("\"", "\"\"")).append('"') + } else { + sb.append(cell) + } + } + sb.append("\r\n") + } + return sb.toString() + } +} diff --git a/benchmark/shared/src/main/kotlin/com/jsoizo/kotlincsv/bench/shared/DataStats.kt b/benchmark/shared/src/main/kotlin/com/jsoizo/kotlincsv/bench/shared/DataStats.kt new file mode 100644 index 0000000..5d68393 --- /dev/null +++ b/benchmark/shared/src/main/kotlin/com/jsoizo/kotlincsv/bench/shared/DataStats.kt @@ -0,0 +1,31 @@ +package com.jsoizo.kotlincsv.bench.shared + +data class DataStats( + val dataset: String, + val rows: Int, + val cols: Int, + val seed: Long, + val avgCellLen: Double, + val maxCellLen: Int, + val quoteRate: Double, + val embeddedCommaRate: Double, + val embeddedNewlineRate: Double, + val utf8MultiByteCellRate: Double, + val totalBytes: Long, +) { + fun toJson(): String = buildString { + append("{") + append("\"dataset\":\"").append(dataset).append("\",") + append("\"rows\":").append(rows).append(",") + append("\"cols\":").append(cols).append(",") + append("\"seed\":").append(seed).append(",") + append("\"avgCellLen\":").append(avgCellLen).append(",") + append("\"maxCellLen\":").append(maxCellLen).append(",") + append("\"quoteRate\":").append(quoteRate).append(",") + append("\"embeddedCommaRate\":").append(embeddedCommaRate).append(",") + append("\"embeddedNewlineRate\":").append(embeddedNewlineRate).append(",") + append("\"utf8MultiByteCellRate\":").append(utf8MultiByteCellRate).append(",") + append("\"totalBytes\":").append(totalBytes) + append("}") + } +} diff --git a/benchmark/shared/src/main/kotlin/com/jsoizo/kotlincsv/bench/shared/DatasetSpec.kt b/benchmark/shared/src/main/kotlin/com/jsoizo/kotlincsv/bench/shared/DatasetSpec.kt new file mode 100644 index 0000000..a845f80 --- /dev/null +++ b/benchmark/shared/src/main/kotlin/com/jsoizo/kotlincsv/bench/shared/DatasetSpec.kt @@ -0,0 +1,15 @@ +package com.jsoizo.kotlincsv.bench.shared + +enum class DatasetSpec( + val rows: Int, + val cols: Int, + val quoteRate: Double, + val embeddedCommaRate: Double, + val embeddedNewlineRate: Double, + val utf8MultiByteRate: Double, +) { + SMALL(rows = 1_000, cols = 10, quoteRate = 0.0, embeddedCommaRate = 0.0, embeddedNewlineRate = 0.0, utf8MultiByteRate = 0.0), + MEDIUM(rows = 100_000, cols = 20, quoteRate = 0.0, embeddedCommaRate = 0.0, embeddedNewlineRate = 0.0, utf8MultiByteRate = 0.0), + LARGE(rows = 1_000_000, cols = 10, quoteRate = 0.0, embeddedCommaRate = 0.0, embeddedNewlineRate = 0.0, utf8MultiByteRate = 0.0), + HARD(rows = 10_000, cols = 10, quoteRate = 0.30, embeddedCommaRate = 0.10, embeddedNewlineRate = 0.05, utf8MultiByteRate = 0.20), +} diff --git a/benchmark/shared/src/main/kotlin/com/jsoizo/kotlincsv/bench/shared/EnvProbe.kt b/benchmark/shared/src/main/kotlin/com/jsoizo/kotlincsv/bench/shared/EnvProbe.kt new file mode 100644 index 0000000..78100f2 --- /dev/null +++ b/benchmark/shared/src/main/kotlin/com/jsoizo/kotlincsv/bench/shared/EnvProbe.kt @@ -0,0 +1,49 @@ +package com.jsoizo.kotlincsv.bench.shared + +object EnvProbe { + data class Snapshot( + val jdkVersion: String, + val jdkVendor: String, + val osName: String, + val osVersion: String, + val osArch: String, + val availableProcessors: Int, + val totalMemoryMB: Long, + val kotlinStdlibVersion: String, + val benchSide: String, + val timestampEpochMs: Long, + ) { + fun toJson(): String = buildString { + append("{") + append("\"jdkVersion\":\"").append(escape(jdkVersion)).append("\",") + append("\"jdkVendor\":\"").append(escape(jdkVendor)).append("\",") + append("\"osName\":\"").append(escape(osName)).append("\",") + append("\"osVersion\":\"").append(escape(osVersion)).append("\",") + append("\"osArch\":\"").append(escape(osArch)).append("\",") + append("\"availableProcessors\":").append(availableProcessors).append(",") + append("\"totalMemoryMB\":").append(totalMemoryMB).append(",") + append("\"kotlinStdlibVersion\":\"").append(escape(kotlinStdlibVersion)).append("\",") + append("\"benchSide\":\"").append(escape(benchSide)).append("\",") + append("\"timestampEpochMs\":").append(timestampEpochMs) + append("}") + } + + private fun escape(s: String): String = s.replace("\\", "\\\\").replace("\"", "\\\"") + } + + fun snapshot(benchSide: String): Snapshot { + val runtime = Runtime.getRuntime() + return Snapshot( + jdkVersion = System.getProperty("java.version") ?: "", + jdkVendor = System.getProperty("java.vendor") ?: "", + osName = System.getProperty("os.name") ?: "", + osVersion = System.getProperty("os.version") ?: "", + osArch = System.getProperty("os.arch") ?: "", + availableProcessors = runtime.availableProcessors(), + totalMemoryMB = runtime.totalMemory() / (1024L * 1024L), + kotlinStdlibVersion = KotlinVersion.CURRENT.toString(), + benchSide = benchSide, + timestampEpochMs = System.currentTimeMillis(), + ) + } +} diff --git a/benchmark/v1/build.gradle.kts b/benchmark/v1/build.gradle.kts new file mode 100644 index 0000000..6436416 --- /dev/null +++ b/benchmark/v1/build.gradle.kts @@ -0,0 +1,54 @@ +plugins { + alias(libs.plugins.kotlinJvm) + alias(libs.plugins.jmh) +} + +kotlin { + jvmToolchain(21) +} + +dependencies { + jmh(libs.kotlincsv.v1.jvm) + jmh(project(":benchmark:shared")) +} + +jmh { + warmupIterations.set(5) + iterations.set(5) + fork.set(2) + timeOnIteration.set("10s") + warmup.set("10s") + benchmarkMode.set(listOf("thrpt", "avgt")) + resultFormat.set("JSON") + jvmArgs.set(listOf("-Xms2g", "-Xmx2g")) + duplicateClassesStrategy.set(DuplicatesStrategy.EXCLUDE) + + val include = (project.findProperty("jmh.include") as String?)?.let { listOf(it) } + if (include != null) includes.set(include) + (project.findProperty("jmh.warmupIterations") as String?)?.toInt()?.let { warmupIterations.set(it) } + (project.findProperty("jmh.iterations") as String?)?.toInt()?.let { iterations.set(it) } + (project.findProperty("jmh.fork") as String?)?.toInt()?.let { fork.set(it) } + + when (project.findProperty("bench.profile") as String?) { + "large" -> { + warmupIterations.set(2) + iterations.set(3) + fork.set(1) + benchmarkMode.set(listOf("thrpt")) + } + "gcprof" -> { + warmupIterations.set(3) + iterations.set(3) + fork.set(2) + benchmarkMode.set(listOf("thrpt")) + profilers.set(listOf("gc")) + } + "stackprof" -> { + warmupIterations.set(2) + iterations.set(2) + fork.set(1) + benchmarkMode.set(listOf("thrpt")) + profilers.set(listOf("stack")) + } + } +} diff --git a/benchmark/v1/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v1/ReadBenchmarksV1.kt b/benchmark/v1/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v1/ReadBenchmarksV1.kt new file mode 100644 index 0000000..31c2e0c --- /dev/null +++ b/benchmark/v1/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v1/ReadBenchmarksV1.kt @@ -0,0 +1,43 @@ +package com.jsoizo.kotlincsv.bench.v1 + +import com.github.doyaaaaaken.kotlincsv.dsl.csvReader +import com.jsoizo.kotlincsv.bench.v1.state.FileInputStateLargeV1 +import com.jsoizo.kotlincsv.bench.v1.state.FileInputStateV1 +import com.jsoizo.kotlincsv.bench.v1.state.GeneratedDataStateV1 +import org.openjdk.jmh.annotations.Benchmark +import org.openjdk.jmh.annotations.BenchmarkMode +import org.openjdk.jmh.annotations.Mode +import org.openjdk.jmh.annotations.OutputTimeUnit +import org.openjdk.jmh.annotations.Scope +import org.openjdk.jmh.annotations.State +import java.io.ByteArrayInputStream +import java.util.concurrent.TimeUnit + +@State(Scope.Benchmark) +@BenchmarkMode(Mode.Throughput, Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +open class ReadBenchmarksV1 { + + @Benchmark + fun readAllString(data: GeneratedDataStateV1): List> = + csvReader().readAll(data.csvText) + + @Benchmark + fun readAllInputStream(data: GeneratedDataStateV1): List> = + ByteArrayInputStream(data.csvBytes).use { csvReader().readAll(it) } + + @Benchmark + fun readAllFile(state: FileInputStateV1): List> = + csvReader().readAll(state.file) + + @Benchmark + fun sequenceIterativeFile(state: FileInputStateLargeV1): Int = + csvReader().open(state.file) { readAllAsSequence().count() } + + @Benchmark + fun readAllWithHeader(data: GeneratedDataStateV1): List> { + val headers = (0 until data.rows[0].size).joinToString(",") { "col$it" } + val text = headers + "\r\n" + data.csvText + return csvReader().readAllWithHeader(text) + } +} diff --git a/benchmark/v1/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v1/WriteBenchmarksV1.kt b/benchmark/v1/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v1/WriteBenchmarksV1.kt new file mode 100644 index 0000000..563ec44 --- /dev/null +++ b/benchmark/v1/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v1/WriteBenchmarksV1.kt @@ -0,0 +1,30 @@ +package com.jsoizo.kotlincsv.bench.v1 + +import com.github.doyaaaaaken.kotlincsv.dsl.csvWriter +import com.jsoizo.kotlincsv.bench.v1.state.FileOutputSinkStateV1 +import com.jsoizo.kotlincsv.bench.v1.state.GeneratedDataStateLargeV1 +import com.jsoizo.kotlincsv.bench.v1.state.GeneratedDataStateV1 +import com.jsoizo.kotlincsv.bench.v1.state.NullOutputSinkStateV1 +import org.openjdk.jmh.annotations.Benchmark +import org.openjdk.jmh.annotations.BenchmarkMode +import org.openjdk.jmh.annotations.Mode +import org.openjdk.jmh.annotations.OutputTimeUnit +import org.openjdk.jmh.annotations.Scope +import org.openjdk.jmh.annotations.State +import java.util.concurrent.TimeUnit + +@State(Scope.Benchmark) +@BenchmarkMode(Mode.Throughput, Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +open class WriteBenchmarksV1 { + + @Benchmark + fun writeAllOutputStream(data: GeneratedDataStateV1, sink: NullOutputSinkStateV1) { + csvWriter().writeAll(data.rows, sink.sink) + } + + @Benchmark + fun writeAllFile(data: GeneratedDataStateLargeV1, sink: FileOutputSinkStateV1) { + csvWriter().writeAll(data.rows, sink.file) + } +} diff --git a/benchmark/v1/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v1/state/FileInputStateV1.kt b/benchmark/v1/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v1/state/FileInputStateV1.kt new file mode 100644 index 0000000..5a7af0b --- /dev/null +++ b/benchmark/v1/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v1/state/FileInputStateV1.kt @@ -0,0 +1,55 @@ +package com.jsoizo.kotlincsv.bench.v1.state + +import com.jsoizo.kotlincsv.bench.shared.CsvDataGen +import com.jsoizo.kotlincsv.bench.shared.DatasetSpec +import org.openjdk.jmh.annotations.Level +import org.openjdk.jmh.annotations.Param +import org.openjdk.jmh.annotations.Scope +import org.openjdk.jmh.annotations.Setup +import org.openjdk.jmh.annotations.State +import org.openjdk.jmh.annotations.TearDown +import java.io.File + +@State(Scope.Benchmark) +open class FileInputStateV1 { + @Param("SMALL", "MEDIUM", "HARD") + lateinit var dataset: String + + lateinit var file: File + + @Setup(Level.Trial) + fun setup() { + val spec = DatasetSpec.valueOf(dataset) + val gen = CsvDataGen.generate(spec) + file = File.createTempFile("bench-v1-${dataset}-", ".csv").apply { + writeBytes(gen.csvBytes) + } + } + + @TearDown(Level.Trial) + fun tearDown() { + file.delete() + } +} + +@State(Scope.Benchmark) +open class FileInputStateLargeV1 { + @Param("SMALL", "MEDIUM", "LARGE", "HARD") + lateinit var dataset: String + + lateinit var file: File + + @Setup(Level.Trial) + fun setup() { + val spec = DatasetSpec.valueOf(dataset) + val gen = CsvDataGen.generate(spec) + file = File.createTempFile("bench-v1-large-${dataset}-", ".csv").apply { + writeBytes(gen.csvBytes) + } + } + + @TearDown(Level.Trial) + fun tearDown() { + file.delete() + } +} diff --git a/benchmark/v1/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v1/state/GeneratedDataStateV1.kt b/benchmark/v1/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v1/state/GeneratedDataStateV1.kt new file mode 100644 index 0000000..d3c5255 --- /dev/null +++ b/benchmark/v1/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v1/state/GeneratedDataStateV1.kt @@ -0,0 +1,45 @@ +package com.jsoizo.kotlincsv.bench.v1.state + +import com.jsoizo.kotlincsv.bench.shared.CsvDataGen +import com.jsoizo.kotlincsv.bench.shared.DatasetSpec +import org.openjdk.jmh.annotations.Level +import org.openjdk.jmh.annotations.Param +import org.openjdk.jmh.annotations.Scope +import org.openjdk.jmh.annotations.Setup +import org.openjdk.jmh.annotations.State + +@State(Scope.Benchmark) +open class GeneratedDataStateV1 { + @Param("SMALL", "MEDIUM", "HARD") + lateinit var dataset: String + + lateinit var rows: List> + lateinit var csvText: String + lateinit var csvBytes: ByteArray + + @Setup(Level.Trial) + fun setup() { + val spec = DatasetSpec.valueOf(dataset) + val gen = CsvDataGen.generate(spec) + rows = gen.rows + csvText = gen.csvText + csvBytes = gen.csvBytes + } +} + +@State(Scope.Benchmark) +open class GeneratedDataStateLargeV1 { + @Param("SMALL", "MEDIUM", "LARGE", "HARD") + lateinit var dataset: String + + lateinit var rows: List> + lateinit var csvBytes: ByteArray + + @Setup(Level.Trial) + fun setup() { + val spec = DatasetSpec.valueOf(dataset) + val gen = CsvDataGen.generate(spec) + rows = gen.rows + csvBytes = gen.csvBytes + } +} diff --git a/benchmark/v1/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v1/state/OutputSinkStateV1.kt b/benchmark/v1/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v1/state/OutputSinkStateV1.kt new file mode 100644 index 0000000..d59006d --- /dev/null +++ b/benchmark/v1/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v1/state/OutputSinkStateV1.kt @@ -0,0 +1,44 @@ +package com.jsoizo.kotlincsv.bench.v1.state + +import org.openjdk.jmh.annotations.Level +import org.openjdk.jmh.annotations.Param +import org.openjdk.jmh.annotations.Scope +import org.openjdk.jmh.annotations.Setup +import org.openjdk.jmh.annotations.State +import org.openjdk.jmh.annotations.TearDown +import java.io.File +import java.io.OutputStream + +@State(Scope.Benchmark) +open class NullOutputSinkStateV1 { + var sink: OutputStream = OutputStream.nullOutputStream() + private set + + @Setup(Level.Invocation) + fun setup() { + sink = OutputStream.nullOutputStream() + } +} + +@State(Scope.Benchmark) +open class FileOutputSinkStateV1 { + @Param("SMALL", "MEDIUM", "LARGE", "HARD") + lateinit var dataset: String + + lateinit var file: File + + @Setup(Level.Trial) + fun setupTrial() { + file = File.createTempFile("bench-v1-out-${dataset}-", ".csv") + } + + @Setup(Level.Invocation) + fun setupInvocation() { + if (file.exists()) file.delete() + } + + @TearDown(Level.Trial) + fun tearDown() { + file.delete() + } +} diff --git a/benchmark/v2/build.gradle.kts b/benchmark/v2/build.gradle.kts new file mode 100644 index 0000000..6c1650f --- /dev/null +++ b/benchmark/v2/build.gradle.kts @@ -0,0 +1,55 @@ +plugins { + alias(libs.plugins.kotlinJvm) + alias(libs.plugins.jmh) +} + +kotlin { + jvmToolchain(21) +} + +dependencies { + jmh(project(":")) + jmh(project(":benchmark:shared")) + jmh(libs.kotlinx.io.core) +} + +jmh { + warmupIterations.set(5) + iterations.set(5) + fork.set(2) + timeOnIteration.set("10s") + warmup.set("10s") + benchmarkMode.set(listOf("thrpt", "avgt")) + resultFormat.set("JSON") + jvmArgs.set(listOf("-Xms2g", "-Xmx2g")) + duplicateClassesStrategy.set(DuplicatesStrategy.EXCLUDE) + + val include = (project.findProperty("jmh.include") as String?)?.let { listOf(it) } + if (include != null) includes.set(include) + (project.findProperty("jmh.warmupIterations") as String?)?.toInt()?.let { warmupIterations.set(it) } + (project.findProperty("jmh.iterations") as String?)?.toInt()?.let { iterations.set(it) } + (project.findProperty("jmh.fork") as String?)?.toInt()?.let { fork.set(it) } + + when (project.findProperty("bench.profile") as String?) { + "large" -> { + warmupIterations.set(2) + iterations.set(3) + fork.set(1) + benchmarkMode.set(listOf("thrpt")) + } + "gcprof" -> { + warmupIterations.set(3) + iterations.set(3) + fork.set(2) + benchmarkMode.set(listOf("thrpt")) + profilers.set(listOf("gc")) + } + "stackprof" -> { + warmupIterations.set(2) + iterations.set(2) + fork.set(1) + benchmarkMode.set(listOf("thrpt")) + profilers.set(listOf("stack")) + } + } +} diff --git a/benchmark/v2/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v2/ReadBenchmarksV2.kt b/benchmark/v2/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v2/ReadBenchmarksV2.kt new file mode 100644 index 0000000..429c163 --- /dev/null +++ b/benchmark/v2/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v2/ReadBenchmarksV2.kt @@ -0,0 +1,47 @@ +package com.jsoizo.kotlincsv.bench.v2 + +import com.jsoizo.kotlincsv.bench.v2.state.FileInputStateLargeV2 +import com.jsoizo.kotlincsv.bench.v2.state.FileInputStateV2 +import com.jsoizo.kotlincsv.bench.v2.state.GeneratedDataStateV2 +import com.jsoizo.kotlincsv.csvReader +import com.jsoizo.kotlincsv.reader.readAll +import com.jsoizo.kotlincsv.reader.readAllFromFile +import com.jsoizo.kotlincsv.reader.readFromFile +import com.jsoizo.kotlincsv.reader.withHeader +import org.openjdk.jmh.annotations.Benchmark +import org.openjdk.jmh.annotations.BenchmarkMode +import org.openjdk.jmh.annotations.Mode +import org.openjdk.jmh.annotations.OutputTimeUnit +import org.openjdk.jmh.annotations.Scope +import org.openjdk.jmh.annotations.State +import java.io.ByteArrayInputStream +import java.util.concurrent.TimeUnit + +@State(Scope.Benchmark) +@BenchmarkMode(Mode.Throughput, Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +open class ReadBenchmarksV2 { + + @Benchmark + fun readAllString(data: GeneratedDataStateV2): List> = + csvReader().readAll(data.csvText) + + @Benchmark + fun readAllInputStream(data: GeneratedDataStateV2): List> = + ByteArrayInputStream(data.csvBytes).use { csvReader().readAll(it, "UTF-8") } + + @Benchmark + fun readAllFile(state: FileInputStateV2): List> = + csvReader().readAllFromFile(state.file, "UTF-8") + + @Benchmark + fun sequenceIterativeFile(state: FileInputStateLargeV2): Int = + csvReader().readFromFile(state.file, "UTF-8") { it.count() } + + @Benchmark + fun readAllWithHeader(data: GeneratedDataStateV2): List> { + val headers = (0 until data.rows[0].size).joinToString(",") { "col$it" } + val text = headers + "\r\n" + data.csvText + return csvReader().readAll(text).asSequence().withHeader().toList() + } +} diff --git a/benchmark/v2/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v2/V2BackendBenchmarks.kt b/benchmark/v2/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v2/V2BackendBenchmarks.kt new file mode 100644 index 0000000..95fb5db --- /dev/null +++ b/benchmark/v2/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v2/V2BackendBenchmarks.kt @@ -0,0 +1,50 @@ +package com.jsoizo.kotlincsv.bench.v2 + +import com.jsoizo.kotlincsv.bench.v2.state.FileInputStateLargeV2 +import com.jsoizo.kotlincsv.bench.v2.state.FileInputStateV2 +import com.jsoizo.kotlincsv.bench.v2.state.FileOutputSinkStateV2 +import com.jsoizo.kotlincsv.bench.v2.state.GeneratedDataStateLargeV2 +import com.jsoizo.kotlincsv.csvReader +import com.jsoizo.kotlincsv.csvWriter +import com.jsoizo.kotlincsv.reader.readAllFromFile +import com.jsoizo.kotlincsv.reader.readFromFile +import com.jsoizo.kotlincsv.writer.writeToFile +import org.openjdk.jmh.annotations.Benchmark +import org.openjdk.jmh.annotations.BenchmarkMode +import org.openjdk.jmh.annotations.Mode +import org.openjdk.jmh.annotations.OutputTimeUnit +import org.openjdk.jmh.annotations.Scope +import org.openjdk.jmh.annotations.State +import java.util.concurrent.TimeUnit + +@State(Scope.Benchmark) +@BenchmarkMode(Mode.Throughput, Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +open class V2BackendBenchmarks { + + @Benchmark + fun readAllFile_javaIo(state: FileInputStateV2): List> = + csvReader().readAllFromFile(state.file, "UTF-8") + + @Benchmark + fun readAllFile_kotlinxIo(state: FileInputStateV2): List> = + csvReader().readAllFromFile(state.path) + + @Benchmark + fun sequenceIterativeFile_javaIo(state: FileInputStateLargeV2): Int = + csvReader().readFromFile(state.file, "UTF-8") { it.count() } + + @Benchmark + fun sequenceIterativeFile_kotlinxIo(state: FileInputStateLargeV2): Int = + csvReader().readFromFile(state.path) { it.count() } + + @Benchmark + fun writeAllFile_javaIo(data: GeneratedDataStateLargeV2, sink: FileOutputSinkStateV2) { + csvWriter().writeToFile(data.rows.asSequence(), sink.file, "UTF-8") + } + + @Benchmark + fun writeAllFile_kotlinxIo(data: GeneratedDataStateLargeV2, sink: FileOutputSinkStateV2) { + csvWriter().writeToFile(data.rows.asSequence(), sink.path) + } +} diff --git a/benchmark/v2/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v2/WriteBenchmarksV2.kt b/benchmark/v2/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v2/WriteBenchmarksV2.kt new file mode 100644 index 0000000..6f9a065 --- /dev/null +++ b/benchmark/v2/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v2/WriteBenchmarksV2.kt @@ -0,0 +1,32 @@ +package com.jsoizo.kotlincsv.bench.v2 + +import com.jsoizo.kotlincsv.bench.v2.state.FileOutputSinkStateV2 +import com.jsoizo.kotlincsv.bench.v2.state.GeneratedDataStateLargeV2 +import com.jsoizo.kotlincsv.bench.v2.state.GeneratedDataStateV2 +import com.jsoizo.kotlincsv.bench.v2.state.NullOutputSinkStateV2 +import com.jsoizo.kotlincsv.csvWriter +import com.jsoizo.kotlincsv.writer.write +import com.jsoizo.kotlincsv.writer.writeToFile +import org.openjdk.jmh.annotations.Benchmark +import org.openjdk.jmh.annotations.BenchmarkMode +import org.openjdk.jmh.annotations.Mode +import org.openjdk.jmh.annotations.OutputTimeUnit +import org.openjdk.jmh.annotations.Scope +import org.openjdk.jmh.annotations.State +import java.util.concurrent.TimeUnit + +@State(Scope.Benchmark) +@BenchmarkMode(Mode.Throughput, Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +open class WriteBenchmarksV2 { + + @Benchmark + fun writeAllOutputStream(data: GeneratedDataStateV2, sink: NullOutputSinkStateV2) { + csvWriter().write(data.rows.asSequence(), sink.sink, "UTF-8") + } + + @Benchmark + fun writeAllFile(data: GeneratedDataStateLargeV2, sink: FileOutputSinkStateV2) { + csvWriter().writeToFile(data.rows.asSequence(), sink.file, "UTF-8") + } +} diff --git a/benchmark/v2/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v2/state/FileInputStateV2.kt b/benchmark/v2/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v2/state/FileInputStateV2.kt new file mode 100644 index 0000000..14b9073 --- /dev/null +++ b/benchmark/v2/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v2/state/FileInputStateV2.kt @@ -0,0 +1,60 @@ +package com.jsoizo.kotlincsv.bench.v2.state + +import com.jsoizo.kotlincsv.bench.shared.CsvDataGen +import com.jsoizo.kotlincsv.bench.shared.DatasetSpec +import kotlinx.io.files.Path +import org.openjdk.jmh.annotations.Level +import org.openjdk.jmh.annotations.Param +import org.openjdk.jmh.annotations.Scope +import org.openjdk.jmh.annotations.Setup +import org.openjdk.jmh.annotations.State +import org.openjdk.jmh.annotations.TearDown +import java.io.File + +@State(Scope.Benchmark) +open class FileInputStateV2 { + @Param("SMALL", "MEDIUM", "HARD") + lateinit var dataset: String + + lateinit var file: File + lateinit var path: Path + + @Setup(Level.Trial) + fun setup() { + val spec = DatasetSpec.valueOf(dataset) + val gen = CsvDataGen.generate(spec) + file = File.createTempFile("bench-v2-${dataset}-", ".csv").apply { + writeBytes(gen.csvBytes) + } + path = Path(file.absolutePath) + } + + @TearDown(Level.Trial) + fun tearDown() { + file.delete() + } +} + +@State(Scope.Benchmark) +open class FileInputStateLargeV2 { + @Param("SMALL", "MEDIUM", "LARGE", "HARD") + lateinit var dataset: String + + lateinit var file: File + lateinit var path: Path + + @Setup(Level.Trial) + fun setup() { + val spec = DatasetSpec.valueOf(dataset) + val gen = CsvDataGen.generate(spec) + file = File.createTempFile("bench-v2-large-${dataset}-", ".csv").apply { + writeBytes(gen.csvBytes) + } + path = Path(file.absolutePath) + } + + @TearDown(Level.Trial) + fun tearDown() { + file.delete() + } +} diff --git a/benchmark/v2/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v2/state/GeneratedDataStateV2.kt b/benchmark/v2/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v2/state/GeneratedDataStateV2.kt new file mode 100644 index 0000000..71a3e77 --- /dev/null +++ b/benchmark/v2/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v2/state/GeneratedDataStateV2.kt @@ -0,0 +1,45 @@ +package com.jsoizo.kotlincsv.bench.v2.state + +import com.jsoizo.kotlincsv.bench.shared.CsvDataGen +import com.jsoizo.kotlincsv.bench.shared.DatasetSpec +import org.openjdk.jmh.annotations.Level +import org.openjdk.jmh.annotations.Param +import org.openjdk.jmh.annotations.Scope +import org.openjdk.jmh.annotations.Setup +import org.openjdk.jmh.annotations.State + +@State(Scope.Benchmark) +open class GeneratedDataStateV2 { + @Param("SMALL", "MEDIUM", "HARD") + lateinit var dataset: String + + lateinit var rows: List> + lateinit var csvText: String + lateinit var csvBytes: ByteArray + + @Setup(Level.Trial) + fun setup() { + val spec = DatasetSpec.valueOf(dataset) + val gen = CsvDataGen.generate(spec) + rows = gen.rows + csvText = gen.csvText + csvBytes = gen.csvBytes + } +} + +@State(Scope.Benchmark) +open class GeneratedDataStateLargeV2 { + @Param("SMALL", "MEDIUM", "LARGE", "HARD") + lateinit var dataset: String + + lateinit var rows: List> + lateinit var csvBytes: ByteArray + + @Setup(Level.Trial) + fun setup() { + val spec = DatasetSpec.valueOf(dataset) + val gen = CsvDataGen.generate(spec) + rows = gen.rows + csvBytes = gen.csvBytes + } +} diff --git a/benchmark/v2/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v2/state/OutputSinkStateV2.kt b/benchmark/v2/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v2/state/OutputSinkStateV2.kt new file mode 100644 index 0000000..beb66a0 --- /dev/null +++ b/benchmark/v2/src/jmh/kotlin/com/jsoizo/kotlincsv/bench/v2/state/OutputSinkStateV2.kt @@ -0,0 +1,47 @@ +package com.jsoizo.kotlincsv.bench.v2.state + +import kotlinx.io.files.Path +import org.openjdk.jmh.annotations.Level +import org.openjdk.jmh.annotations.Param +import org.openjdk.jmh.annotations.Scope +import org.openjdk.jmh.annotations.Setup +import org.openjdk.jmh.annotations.State +import org.openjdk.jmh.annotations.TearDown +import java.io.File +import java.io.OutputStream + +@State(Scope.Benchmark) +open class NullOutputSinkStateV2 { + var sink: OutputStream = OutputStream.nullOutputStream() + private set + + @Setup(Level.Invocation) + fun setup() { + sink = OutputStream.nullOutputStream() + } +} + +@State(Scope.Benchmark) +open class FileOutputSinkStateV2 { + @Param("SMALL", "MEDIUM", "LARGE", "HARD") + lateinit var dataset: String + + lateinit var file: File + lateinit var path: Path + + @Setup(Level.Trial) + fun setupTrial() { + file = File.createTempFile("bench-v2-out-${dataset}-", ".csv") + path = Path(file.absolutePath) + } + + @Setup(Level.Invocation) + fun setupInvocation() { + if (file.exists()) file.delete() + } + + @TearDown(Level.Trial) + fun tearDown() { + file.delete() + } +} diff --git a/build.gradle.kts b/build.gradle.kts index 79a22d2..eec82ce 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -7,9 +7,11 @@ import org.jetbrains.kotlin.gradle.tasks.KotlinJvmCompile plugins { alias(libs.plugins.kotlinMultiplatform) + alias(libs.plugins.kotlinJvm) apply false alias(libs.plugins.dokka) alias(libs.plugins.kover) alias(libs.plugins.mavenPublish) + alias(libs.plugins.jmh) apply false } group = "com.jsoizo" diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 1f4e47b..f29e3fd 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -6,6 +6,9 @@ maven-publish = "0.36.0" kover = "0.9.8" dokka = "2.2.0" kotest = "6.1.3" +jmh = "1.37" +jmhPlugin = "0.7.2" +kotlincsvV1 = "1.10.0" [libraries] kotlin-test = { module = "org.jetbrains.kotlin:kotlin-test", version.ref = "kotlin" } @@ -16,12 +19,17 @@ kotlinx-io-core = { module = "org.jetbrains.kotlinx:kotlinx-io-core", version.re kotest-runner-junit5 = { module = "io.kotest:kotest-runner-junit5", version.ref = "kotest" } kotest-assertions-core = { module = "io.kotest:kotest-assertions-core", version.ref = "kotest" } kotest-property = { module = "io.kotest:kotest-property", version.ref = "kotest" } +jmh-core = { module = "org.openjdk.jmh:jmh-core", version.ref = "jmh" } +jmh-generator-annprocess = { module = "org.openjdk.jmh:jmh-generator-annprocess", version.ref = "jmh" } +kotlincsv-v1-jvm = { module = "com.jsoizo:kotlin-csv-jvm", version.ref = "kotlincsvV1" } [bundles] kotest = ["kotest-runner-junit5", "kotest-assertions-core", "kotest-property"] [plugins] kotlinMultiplatform = { id = "org.jetbrains.kotlin.multiplatform", version.ref = "kotlin" } +kotlinJvm = { id = "org.jetbrains.kotlin.jvm", version.ref = "kotlin" } kover = { id = "org.jetbrains.kotlinx.kover", version.ref = "kover" } dokka = { id = "org.jetbrains.dokka", version.ref = "dokka" } mavenPublish = { id = "com.vanniktech.maven.publish", version.ref = "maven-publish" } +jmh = { id = "me.champeau.jmh", version.ref = "jmhPlugin" } diff --git a/settings.gradle.kts b/settings.gradle.kts index 4a83eb2..52d6723 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -12,3 +12,5 @@ dependencyResolutionManagement { mavenCentral() } } + +include(":benchmark:shared", ":benchmark:v1", ":benchmark:v2", ":benchmark:parity") From 6933d2f6f50070189ef8a994da2f4f7579588fc7 Mon Sep 17 00:00:00 2001 From: Jun Sekine Date: Sun, 17 May 2026 02:46:54 +0900 Subject: [PATCH 2/4] Add 'quick' JMH bench profile for short-duration first-cut runs Sets warmup=3, iter=3, fork=1, time=5s and restricts dataset @Param to SMALL and HARD via JMH '-p dataset' equivalent. Intended for the first issue #172 comment so readers see numbers before the full primary run finishes. --- benchmark/v1/build.gradle.kts | 9 +++++++++ benchmark/v2/build.gradle.kts | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/benchmark/v1/build.gradle.kts b/benchmark/v1/build.gradle.kts index 6436416..7abfa11 100644 --- a/benchmark/v1/build.gradle.kts +++ b/benchmark/v1/build.gradle.kts @@ -30,6 +30,15 @@ jmh { (project.findProperty("jmh.fork") as String?)?.toInt()?.let { fork.set(it) } when (project.findProperty("bench.profile") as String?) { + "quick" -> { + warmupIterations.set(3) + iterations.set(3) + fork.set(1) + timeOnIteration.set("5s") + warmup.set("5s") + benchmarkMode.set(listOf("thrpt", "avgt")) + benchmarkParameters.put("dataset", listOf("SMALL", "HARD")) + } "large" -> { warmupIterations.set(2) iterations.set(3) diff --git a/benchmark/v2/build.gradle.kts b/benchmark/v2/build.gradle.kts index 6c1650f..8e402f5 100644 --- a/benchmark/v2/build.gradle.kts +++ b/benchmark/v2/build.gradle.kts @@ -31,6 +31,15 @@ jmh { (project.findProperty("jmh.fork") as String?)?.toInt()?.let { fork.set(it) } when (project.findProperty("bench.profile") as String?) { + "quick" -> { + warmupIterations.set(3) + iterations.set(3) + fork.set(1) + timeOnIteration.set("5s") + warmup.set("5s") + benchmarkMode.set(listOf("thrpt", "avgt")) + benchmarkParameters.put("dataset", listOf("SMALL", "HARD")) + } "large" -> { warmupIterations.set(2) iterations.set(3) From 35bdc8c41fc9cdef777eedc55f67a836100d2cb9 Mon Sep 17 00:00:00 2001 From: Jun Sekine Date: Sun, 17 May 2026 02:48:38 +0900 Subject: [PATCH 3/4] Fix benchmarkParameters DSL: wrap in ListProperty for jmh plugin 0.7.2 The MapProperty> setter does not accept a plain List. Wrap the value in objects.listProperty(...).set(...). --- benchmark/v1/build.gradle.kts | 5 ++++- benchmark/v2/build.gradle.kts | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/benchmark/v1/build.gradle.kts b/benchmark/v1/build.gradle.kts index 7abfa11..2be3b7d 100644 --- a/benchmark/v1/build.gradle.kts +++ b/benchmark/v1/build.gradle.kts @@ -37,7 +37,10 @@ jmh { timeOnIteration.set("5s") warmup.set("5s") benchmarkMode.set(listOf("thrpt", "avgt")) - benchmarkParameters.put("dataset", listOf("SMALL", "HARD")) + benchmarkParameters.put( + "dataset", + objects.listProperty(String::class.java).apply { set(listOf("SMALL", "HARD")) }, + ) } "large" -> { warmupIterations.set(2) diff --git a/benchmark/v2/build.gradle.kts b/benchmark/v2/build.gradle.kts index 8e402f5..a0f08da 100644 --- a/benchmark/v2/build.gradle.kts +++ b/benchmark/v2/build.gradle.kts @@ -38,7 +38,10 @@ jmh { timeOnIteration.set("5s") warmup.set("5s") benchmarkMode.set(listOf("thrpt", "avgt")) - benchmarkParameters.put("dataset", listOf("SMALL", "HARD")) + benchmarkParameters.put( + "dataset", + objects.listProperty(String::class.java).apply { set(listOf("SMALL", "HARD")) }, + ) } "large" -> { warmupIterations.set(2) From 7fe79b4222222b63ad31b91c6269d67c13526a35 Mon Sep 17 00:00:00 2001 From: Jun Sekine Date: Sun, 17 May 2026 22:09:34 +0900 Subject: [PATCH 4/4] Add 'primary' JMH bench profile (warmup=5, iter=5, fork=2, 10s) Restricts dataset @Param to SMALL/MEDIUM/HARD; the LARGE dataset is covered by the separate 'large' profile per the methodology in #172. --- benchmark/v1/build.gradle.kts | 12 ++++++++++++ benchmark/v2/build.gradle.kts | 12 ++++++++++++ 2 files changed, 24 insertions(+) diff --git a/benchmark/v1/build.gradle.kts b/benchmark/v1/build.gradle.kts index 2be3b7d..0ba657f 100644 --- a/benchmark/v1/build.gradle.kts +++ b/benchmark/v1/build.gradle.kts @@ -30,6 +30,18 @@ jmh { (project.findProperty("jmh.fork") as String?)?.toInt()?.let { fork.set(it) } when (project.findProperty("bench.profile") as String?) { + "primary" -> { + warmupIterations.set(5) + iterations.set(5) + fork.set(2) + timeOnIteration.set("10s") + warmup.set("10s") + benchmarkMode.set(listOf("thrpt", "avgt")) + benchmarkParameters.put( + "dataset", + objects.listProperty(String::class.java).apply { set(listOf("SMALL", "MEDIUM", "HARD")) }, + ) + } "quick" -> { warmupIterations.set(3) iterations.set(3) diff --git a/benchmark/v2/build.gradle.kts b/benchmark/v2/build.gradle.kts index a0f08da..72804b5 100644 --- a/benchmark/v2/build.gradle.kts +++ b/benchmark/v2/build.gradle.kts @@ -31,6 +31,18 @@ jmh { (project.findProperty("jmh.fork") as String?)?.toInt()?.let { fork.set(it) } when (project.findProperty("bench.profile") as String?) { + "primary" -> { + warmupIterations.set(5) + iterations.set(5) + fork.set(2) + timeOnIteration.set("10s") + warmup.set("10s") + benchmarkMode.set(listOf("thrpt", "avgt")) + benchmarkParameters.put( + "dataset", + objects.listProperty(String::class.java).apply { set(listOf("SMALL", "MEDIUM", "HARD")) }, + ) + } "quick" -> { warmupIterations.set(3) iterations.set(3)