From c59f4ce241db8fdf2020d4a8b7aaa89e1a1eaf6a Mon Sep 17 00:00:00 2001 From: Daniel Miranda Date: Fri, 15 Jul 2016 22:24:56 -0300 Subject: [PATCH 1/7] Cleanup and optimize DB schema The database schema was lacking many integrity checks and indexes. Correct it by first applying a migration that removes all old/stale data, then creating those indexes. The driving reason for this is the very slow performance of processing (specially aggregation) on the new mezuro.org servers. It will hopefully remove (or at least heavily improve) the superlinear slowdown when the number of metrics rises, as observed in #207. --- CHANGELOG.rdoc | 1 + .../20160720185407_clean_inconsistencies.rb | 73 +++++++++++++++++++ ...20185408_add_indexes_to_kalibro_modules.rb | 6 ++ ...720185409_add_indexes_to_module_results.rb | 6 ++ ...720185410_add_indexes_to_metric_results.rb | 11 +++ ...160720185411_add_indexes_to_processings.rb | 6 ++ ...0720185412_add_indexes_to_process_times.rb | 5 ++ ...60720185413_add_indexes_to_repositories.rb | 5 ++ db/schema.rb | 16 +++- 9 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 db/migrate/20160720185407_clean_inconsistencies.rb create mode 100644 db/migrate/20160720185408_add_indexes_to_kalibro_modules.rb create mode 100644 db/migrate/20160720185409_add_indexes_to_module_results.rb create mode 100644 db/migrate/20160720185410_add_indexes_to_metric_results.rb create mode 100644 db/migrate/20160720185411_add_indexes_to_processings.rb create mode 100644 db/migrate/20160720185412_add_indexes_to_process_times.rb create mode 100644 db/migrate/20160720185413_add_indexes_to_repositories.rb diff --git a/CHANGELOG.rdoc b/CHANGELOG.rdoc index fbbe272..b06182a 100644 --- a/CHANGELOG.rdoc +++ b/CHANGELOG.rdoc @@ -4,6 +4,7 @@ KalibroProcessor is the processing web service for Mezuro. == Unreleased +* Optimize database structure by adding foreign keys and indexes where needed * Insert in one query all aggregated MetricResults * Aggregate values by tree levels * Enable ModuleResult tree walking by level diff --git a/db/migrate/20160720185407_clean_inconsistencies.rb b/db/migrate/20160720185407_clean_inconsistencies.rb new file mode 100644 index 0000000..8732cf8 --- /dev/null +++ b/db/migrate/20160720185407_clean_inconsistencies.rb @@ -0,0 +1,73 @@ +class CleanInconsistencies < ActiveRecord::Migration + def self.up + # Unset project reference for repositories with non-existing projects + execute <<-SQL + UPDATE repositories AS r + SET project_id = NULL + WHERE project_id = 0 OR NOT EXISTS ( + SELECT 1 FROM projects AS p WHERE p.id = r.project_id + ) + SQL + + # Delete processings with non-existing repositories + execute <<-SQL + DELETE FROM processings AS p + WHERE NOT EXISTS( + SELECT 1 FROM repositories AS r WHERE r.id = p.repository_id + ) + SQL + + # Delete process times with non-existing processings + execute <<-SQL + DELETE FROM process_times AS t + WHERE NOT EXISTS ( + SELECT 1 FROM processings AS p WHERE p.id = t.processing_id + ) + SQL + + # Delete module results with non-existing processings + execute <<-SQL + DELETE FROM module_results AS m + WHERE NOT EXISTS ( + SELECT 1 FROM processings AS p WHERE p.id = m.processing_id + ) + SQL + + # Delete kalibro modules with non-existing module results + execute <<-SQL + DELETE FROM kalibro_modules AS k + WHERE NOT EXISTS ( + SELECT 1 FROM module_results AS m WHERE m.id = k.module_result_id + ) + SQL + + # Delete metric results with non-existing module results + execute <<-SQL + DELETE FROM metric_results AS met + WHERE NOT EXISTS ( + SELECT 1 FROM module_results AS mod WHERE mod.id = met.module_result_id + ) + SQL + + # Delete duplicate metric_results. Group them by (module_result, metric_configuration), + # then delete all but the one with the highest ID + # The double wrapping on the inner query is necessary because window functions + # cannot be used in WHERE in PostgreSQL. + execute <<-SQL + DELETE FROM metric_results + WHERE id IN ( + SELECT t.id FROM ( + SELECT id, ROW_NUMBER() OVER (PARTITION BY module_result_id, metric_configuration_id, "type" + ORDER BY id DESC) AS rnum + FROM metric_results + WHERE "type" = 'TreeMetricResult' + ) AS t + WHERE t.rnum > 1 + ) + SQL + end + + def self.down + raise ActiveRecord::IrreversibleMigration + end +end diff --git a/db/migrate/20160720185408_add_indexes_to_kalibro_modules.rb b/db/migrate/20160720185408_add_indexes_to_kalibro_modules.rb new file mode 100644 index 0000000..718ddd4 --- /dev/null +++ b/db/migrate/20160720185408_add_indexes_to_kalibro_modules.rb @@ -0,0 +1,6 @@ +class AddIndexesToKalibroModules < ActiveRecord::Migration + def change + add_foreign_key :kalibro_modules, :module_results, on_delete: :cascade + add_index :kalibro_modules, [:long_name, :granularity] + end +end diff --git a/db/migrate/20160720185409_add_indexes_to_module_results.rb b/db/migrate/20160720185409_add_indexes_to_module_results.rb new file mode 100644 index 0000000..2726718 --- /dev/null +++ b/db/migrate/20160720185409_add_indexes_to_module_results.rb @@ -0,0 +1,6 @@ +class AddIndexesToModuleResults < ActiveRecord::Migration + def change + add_foreign_key :module_results, :module_results, column: 'parent_id' + add_foreign_key :module_results, :processings, on_delete: :cascade + end +end diff --git a/db/migrate/20160720185410_add_indexes_to_metric_results.rb b/db/migrate/20160720185410_add_indexes_to_metric_results.rb new file mode 100644 index 0000000..cb6bc59 --- /dev/null +++ b/db/migrate/20160720185410_add_indexes_to_metric_results.rb @@ -0,0 +1,11 @@ +class AddIndexesToMetricResults < ActiveRecord::Migration + def change + add_foreign_key :metric_results, :module_results, on_delete: :cascade + add_index :metric_results, :type + add_index :metric_results, :module_result_id + add_index :metric_results, :metric_configuration_id + add_index :metric_results, [:module_result_id, :metric_configuration_id], + unique: true, where: "type = 'TreeMetricResult'", + name: 'metric_results_module_res_metric_cfg_uniq_idx' + end +end diff --git a/db/migrate/20160720185411_add_indexes_to_processings.rb b/db/migrate/20160720185411_add_indexes_to_processings.rb new file mode 100644 index 0000000..a0d1ebc --- /dev/null +++ b/db/migrate/20160720185411_add_indexes_to_processings.rb @@ -0,0 +1,6 @@ +class AddIndexesToProcessings < ActiveRecord::Migration + def change + add_foreign_key :processings, :repositories + add_foreign_key :processings, :module_results, column: 'root_module_result_id' + end +end diff --git a/db/migrate/20160720185412_add_indexes_to_process_times.rb b/db/migrate/20160720185412_add_indexes_to_process_times.rb new file mode 100644 index 0000000..1639f4b --- /dev/null +++ b/db/migrate/20160720185412_add_indexes_to_process_times.rb @@ -0,0 +1,5 @@ +class AddIndexesToProcessTimes < ActiveRecord::Migration + def change + add_foreign_key :process_times, :processings, on_delete: :cascade + end +end diff --git a/db/migrate/20160720185413_add_indexes_to_repositories.rb b/db/migrate/20160720185413_add_indexes_to_repositories.rb new file mode 100644 index 0000000..dcf6b8d --- /dev/null +++ b/db/migrate/20160720185413_add_indexes_to_repositories.rb @@ -0,0 +1,5 @@ +class AddIndexesToRepositories < ActiveRecord::Migration + def change + add_foreign_key :repositories, :projects + end +end diff --git a/db/schema.rb b/db/schema.rb index f576490..d64edbe 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -11,7 +11,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20151002172231) do +ActiveRecord::Schema.define(version: 20160720185413) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -40,6 +40,8 @@ t.integer "module_result_id" end + add_index "kalibro_modules", ["long_name", "granularity"], name: "index_kalibro_modules_on_long_name_and_granularity", using: :btree + create_table "metric_results", force: :cascade do |t| t.integer "module_result_id" t.integer "metric_configuration_id" @@ -52,7 +54,11 @@ t.integer "related_hotspot_metric_results_id" end + add_index "metric_results", ["metric_configuration_id"], name: "index_metric_results_on_metric_configuration_id", using: :btree + add_index "metric_results", ["module_result_id", "metric_configuration_id"], name: "metric_results_module_res_metric_cfg_uniq_idx", unique: true, where: "((type)::text = 'TreeMetricResult'::text)", using: :btree + add_index "metric_results", ["module_result_id"], name: "index_metric_results_on_module_result_id", using: :btree add_index "metric_results", ["related_hotspot_metric_results_id"], name: "index_metric_results_on_related_hotspot_metric_results_id", using: :btree + add_index "metric_results", ["type"], name: "index_metric_results_on_type", using: :btree create_table "module_results", force: :cascade do |t| t.float "grade" @@ -106,5 +112,13 @@ t.string "branch", default: "master", null: false end + add_foreign_key "kalibro_modules", "module_results", on_delete: :cascade + add_foreign_key "metric_results", "module_results", on_delete: :cascade add_foreign_key "metric_results", "related_hotspot_metric_results", column: "related_hotspot_metric_results_id" + add_foreign_key "module_results", "module_results", column: "parent_id" + add_foreign_key "module_results", "processings", on_delete: :cascade + add_foreign_key "process_times", "processings", on_delete: :cascade + add_foreign_key "processings", "module_results", column: "root_module_result_id" + add_foreign_key "processings", "repositories" + add_foreign_key "repositories", "projects" end From dbca2694a65454fdde93c119bf650e21a830488b Mon Sep 17 00:00:00 2001 From: Daniel Miranda Date: Sat, 16 Jul 2016 09:54:19 -0300 Subject: [PATCH 2/7] Refactor runner acceptance steps to avoid hardcoded IDs Hardcoded IDs only worked because we had no referential intergrity constraints. Remove them and clean up the whole model creation code. --- features/metric_result/module_result.feature | 5 +- .../metric_result_history_of.feature | 2 +- features/runner.feature | 2 +- features/step_definitions/runner_steps.rb | 238 +++++++++++------- 4 files changed, 152 insertions(+), 95 deletions(-) diff --git a/features/metric_result/module_result.feature b/features/metric_result/module_result.feature index 235faca..0dc4b14 100644 --- a/features/metric_result/module_result.feature +++ b/features/metric_result/module_result.feature @@ -5,8 +5,7 @@ Feature: ModuleResult retrieval @clear_repository @kalibro_configuration_restart Scenario: With a valid MetricResult id - Given I have sample readings - And I have a sample configuration with the Flay hotspot metric + Given I have a sample configuration with the Flay hotspot metric And I have the kalibro processor ruby repository with revision "v0.11.0" And I have a processing within the sample repository And I run for the given repository @@ -15,5 +14,5 @@ Feature: ModuleResult retrieval Then I should get the given ModuleResult json Scenario: With an invalid MetricResult id - When I request for the ModuleResult of the MetricResult with id "42" + When I request for the ModuleResult of the MetricResult with id "0" Then I should get an error response diff --git a/features/repository/metric_result_history_of.feature b/features/repository/metric_result_history_of.feature index 6260830..2790e96 100644 --- a/features/repository/metric_result_history_of.feature +++ b/features/repository/metric_result_history_of.feature @@ -7,7 +7,7 @@ Feature: Metric Result History Of Scenario: After processing an existing repository with a kalibro configuration Given I have sample readings And I have a sample kalibro configuration with native metrics - And I have a sample repository within the sample project + And I have a sample repository And I have a processing within the sample repository And I run for the given repository When I get the history for the first metric result of the root diff --git a/features/runner.feature b/features/runner.feature index 60412c1..d8567bc 100644 --- a/features/runner.feature +++ b/features/runner.feature @@ -87,7 +87,7 @@ Feature: Runner run Given I have sample readings And I have a sample configuration with the Cyclomatic python native metric And I add the "Maintainability" native metric to the sample configuration - And I have a sample python repository within the sample project + And I have a sample python repository And I have a processing within the sample repository When I run for the given repository Then the repository code_directory should exist diff --git a/features/step_definitions/runner_steps.rb b/features/step_definitions/runner_steps.rb index 39d9806..bf4b759 100644 --- a/features/step_definitions/runner_steps.rb +++ b/features/step_definitions/runner_steps.rb @@ -1,159 +1,217 @@ Given(/^I have a sample kalibro configuration with native metrics$/) do - @kalibro_configuration = FactoryGirl.create(:kalibro_configuration, id: nil) + @configuration = FactoryGirl.create(:kalibro_configuration) + metric_configuration = FactoryGirl.create(:metric_configuration, - {id: nil, - metric: FactoryGirl.build(:loc_metric), - reading_group_id: @reading_group.id, - kalibro_configuration_id: @kalibro_configuration.id}) - range = FactoryGirl.build(:range, {id: nil, reading_id: @reading.id, beginning: '-INF', :end => 'INF', metric_configuration_id: metric_configuration.id}) - range.save + metric: FactoryGirl.build(:loc_metric), + reading_group_id: @reading_group.id, + kalibro_configuration_id: @configuration.id) + + range = FactoryGirl.create(:range, + reading_id: @reading.id, + beginning: '-INF', + end: 'INF', + metric_configuration_id: metric_configuration.id) + compound_metric_configuration = FactoryGirl.create(:compound_metric_configuration, - metric: FactoryGirl.build(:compound_metric, - script: "return loc() * 2;"), - reading_group_id: @reading_group.id, - kalibro_configuration_id: @kalibro_configuration.id) - compound_range = FactoryGirl.build(:range, {reading_id: @reading.id, beginning: '-INF', :end => 'INF', metric_configuration_id: compound_metric_configuration.id}) - compound_range.save + metric: FactoryGirl.build(:compound_metric, script: "return loc() * 2;"), + reading_group_id: @reading_group.id, + kalibro_configuration_id: @configuration.id) + + compound_range = FactoryGirl.create(:range, + reading_id: @reading.id, + beginning: '-INF', + end: 'INF', + metric_configuration_id: compound_metric_configuration.id) end Given(/^I have a sample configuration with the (\w+) native metric$/) do |metric| metric_configuration_factory = (metric + "_metric_configuration").downcase metric_factory = (metric + "_metric").downcase - @configuration = FactoryGirl.create(:kalibro_configuration, id: nil) + + @configuration = FactoryGirl.create(:kalibro_configuration) + metric_configuration = FactoryGirl.create(metric_configuration_factory.to_sym, - {id: 4, - metric: FactoryGirl.build(metric_factory.to_sym), - reading_group_id: @reading_group.id, - kalibro_configuration_id: @configuration.id}) - range = FactoryGirl.build(:range, {id: nil, reading_id: @reading.id, beginning: '-INF', :end => 'INF', metric_configuration_id: metric_configuration.id}) - range.save - compound_metric_configuration = FactoryGirl.create(metric_configuration_factory.sub("_", "_compound_").to_sym, - id: 5, - metric: FactoryGirl.build(("compound_" + metric_factory).to_sym), - reading_group_id: @reading_group.id, - kalibro_configuration_id: @configuration.id) - compound_range = FactoryGirl.build(:range, {id: nil, reading_id: @reading.id, beginning: '-INF', :end => 'INF', metric_configuration_id: compound_metric_configuration.id}) - compound_range.save + metric: FactoryGirl.build(metric_factory.to_sym), + reading_group_id: @reading_group.id, + kalibro_configuration_id: @configuration.id) + + range = FactoryGirl.create(:range, + reading_id: @reading.id, + beginning: '-INF', end: 'INF', + metric_configuration_id: metric_configuration.id) + + compound_metric_configuration_factory = metric_configuration_factory.sub("_", "_compound_") + compound_metric_factory = "compound_" + metric_factory + + compound_metric_configuration = FactoryGirl.create(compound_metric_configuration_factory.to_sym, + metric: FactoryGirl.build(compound_metric_factory.to_sym), + reading_group_id: @reading_group.id, + kalibro_configuration_id: @configuration.id) + + compound_range = FactoryGirl.create(:range, + reading_id: @reading.id, + beginning: '-INF', + end: 'INF', + metric_configuration_id: compound_metric_configuration.id) end Given(/^I have a sample configuration with the (\w+) hotspot metric$/) do |metric| metric_configuration_factory = (metric + "_metric_configuration").downcase metric_factory = (metric + "_metric").downcase - @configuration = FactoryGirl.create(:kalibro_configuration, id: nil) - metric = FactoryGirl.build(metric_factory.to_sym) + + @configuration = FactoryGirl.create(:kalibro_configuration) + @metric_configuration = FactoryGirl.create(metric_configuration_factory.to_sym, - metric: metric, - kalibro_configuration_id: @configuration.id) + metric: FactoryGirl.build(metric_factory.to_sym), + kalibro_configuration_id: @configuration.id) end +Given(/^I have a sample configuration with the (\w+) python native metric$/) do |metric| + metric_configuration_factory = (metric + "_metric_configuration").downcase + metric_factory = (metric + "_metric").downcase + + @configuration = FactoryGirl.create(:kalibro_configuration) + + metric_configuration = FactoryGirl.create(metric_configuration_factory.to_sym, + metric: FactoryGirl.build(metric_factory.to_sym), + reading_group_id: @reading_group.id, + kalibro_configuration_id: @configuration.id) + + range = FactoryGirl.create(:range, + reading_id: @reading.id, + beginning: '-INF', + :end => 'INF', + metric_configuration_id: metric_configuration.id) +end + + Given(/^I have a sample repository$/) do - @repository = FactoryGirl.create(:sbking_repository, kalibro_configuration: @kalibro_configuration) + @repository = FactoryGirl.create(:sbking_repository, kalibro_configuration: @configuration) end Given(/^I have a sample ruby repository$/) do @repository = FactoryGirl.create(:ruby_repository, kalibro_configuration: @configuration) end -Given(/^I have a sample ruby repository within the sample project$/) do - @repository = FactoryGirl.create(:ruby_repository, :with_project_id, kalibro_configuration: @configuration) -end - Given(/^I have a sample php repository$/) do - @repository = FactoryGirl.create(:php_repository, :with_project_id, kalibro_configuration: @configuration) + @repository = FactoryGirl.create(:php_repository, kalibro_configuration: @configuration) end -Given(/^I have a sample configuration with the (\w+) python native metric$/) do |metric| - metric_configuration_factory = (metric + "_metric_configuration").downcase - metric_factory = (metric + "_metric").downcase - @configuration = FactoryGirl.create(:kalibro_configuration, id: nil) - metric_configuration = FactoryGirl.create(metric_configuration_factory.to_sym, - {id: 4, - metric: FactoryGirl.build(metric_factory.to_sym), - reading_group_id: @reading_group.id, - kalibro_configuration_id: @configuration.id}) - range = FactoryGirl.build(:range, {id: nil, reading_id: @reading.id, beginning: '-INF', :end => 'INF', metric_configuration_id: metric_configuration.id}) - range.save -end - -Given(/^I have a sample python repository within the sample project$/) do +Given(/^I have a sample python repository$/) do @repository = FactoryGirl.create(:python_repository, kalibro_configuration: @configuration) end -Given(/^I have a sample repository within the sample project$/) do - @repository = FactoryGirl.create(:sbking_repository, :with_project_id, kalibro_configuration: @kalibro_configuration) +Given(/^I assign the repository to the project$/) do + expect(@repository).to_not be_nil + expect(@project).to_not be_nil + repository.update_attributes(project: @project) end Given(/^I have a processing within the sample repository$/) do - @processing = FactoryGirl.create(:processing, repository: @repository, state: "PREPARING") + @processing = Processing.create!(repository: @repository, state: "PREPARING") end Given(/^I have a compound metric with an invalid script$/) do - invalid_compound_metric_configuration = FactoryGirl.create(:compound_metric_configuration, - metric: FactoryGirl.build(:compound_metric, - script: "rtrnaqdfwqefwqr213r2145211234ed a = b=2", - code: "ACM"), - reading_group_id: @reading_group.id, - kalibro_configuration_id: @kalibro_configuration.id) - compound_range = FactoryGirl.build(:range, {reading_id: @reading.id, beginning: '-INF', :end => 'INF', metric_configuration_id: invalid_compound_metric_configuration.id}) - compound_range.save + compound_metric = FactoryGirl.build(:compound_metric, + script: "rtrnaqdfwqefwqr213r2145211234ed a = b=2", + code: "ACM") + + compound_metric_configuration = FactoryGirl.create(:compound_metric_configuration, + metric: compound_metric, + reading_group_id: @reading_group.id, + kalibro_configuration_id: @configuration.id) + + compound_range = FactoryGirl.create(:range, + reading_id: @reading.id, + beginning: '-INF', + :end => 'INF', + metric_configuration_id: compound_metric_configuration.id) end Given(/^I have sample readings$/) do @reading_group = FactoryGirl.create(:reading_group) - @reading = FactoryGirl.create(:reading, {reading_group_id: @reading_group.id}) + @reading = FactoryGirl.create(:reading, + reading_group_id: @reading_group.id) end Given(/^I have a sample kalibro configuration$/) do - @kalibro_configuration = FactoryGirl.create(:kalibro_configuration, name: "teste") + @configuration = FactoryGirl.create(:kalibro_configuration, + name: "teste") end Given(/^I have a range for this metric configuration$/) do - range = FactoryGirl.build(:range, {reading_id: @reading.id, beginning: '-INF', :end => 'INF', metric_configuration_id: @metric_configuration.id}) - range.save + range = FactoryGirl.create(:range, + reading_id: @reading.id, + beginning: '-INF', :end => 'INF', + metric_configuration_id: @metric_configuration.id) end Given(/^I add the "(.*?)" analizo metric with scope "(.*?)" and code "(.*?)"$/) do |name, scope, code| + metric = FactoryGirl.build(:native_metric, :analizo, + name: name, + scope: scope, + code: code) + @metric_configuration = FactoryGirl.create(:metric_configuration, - {metric: FactoryGirl.build(:native_metric, :analizo, name: name, scope: scope, code: code), - reading_group_id: @reading_group.id, - kalibro_configuration_id: @kalibro_configuration.id}) + metric: metric, + reading_group_id: @reading_group.id, + kalibro_configuration_id: @configuration.id) end Given(/^I have two compound metrics with script "(.*?)" and "(.*?)"$/) do |script1, script2| + compound_metric = FactoryGirl.build(:compound_metric, + script: script1, + code: "compound_metric") + @compound_metric_configuration = FactoryGirl.create(:compound_metric_configuration, - metric: FactoryGirl.build(:compound_metric, script: script1, - code: "compound_metric"), - reading_group_id: @reading_group.id, - kalibro_configuration_id: @kalibro_configuration.id) + metric: compound_metric, + reading_group_id: @reading_group.id, + kalibro_configuration_id: @configuration.id) + + other_compound_metric = FactoryGirl.build(:compound_metric, + script: script2, + code: "ACM") + @other_compound_metric_configuration = FactoryGirl.create(:compound_metric_configuration, - metric: FactoryGirl.build(:compound_metric, script: script2, - code: "ACM"), - reading_group_id: @reading_group.id, - kalibro_configuration_id: @kalibro_configuration.id) - compound_range = FactoryGirl.build(:range, {reading_id: @reading.id, beginning: '-INF', :end => 'INF', metric_configuration_id: @compound_metric_configuration.id}) - compound_range.save - other_compound_range = FactoryGirl.build(:range, {reading_id: @reading.id, beginning: '-INF', :end => 'INF', metric_configuration_id: @other_compound_metric_configuration.id}) - other_compound_range.save + metric: other_compound_metric, + reading_group_id: @reading_group.id, + kalibro_configuration_id: @configuration.id) + + compound_range = FactoryGirl.create(:range, + reading_id: @reading.id, + beginning: '-INF', :end => 'INF', + metric_configuration_id: @compound_metric_configuration.id) + + other_compound_range = FactoryGirl.create(:range, + reading_id: @reading.id, + beginning: '-INF', + :end => 'INF', + metric_configuration_id: @other_compound_metric_configuration.id) end Given(/^I add the "(.*?)" native metric to the sample configuration$/) do |metric| metric_configuration_factory = (metric + "_metric_configuration").downcase metric_factory = (metric + "_metric").downcase + metric_configuration = FactoryGirl.create(metric_configuration_factory.to_sym, - {id: 4, - metric: FactoryGirl.build(metric_factory.to_sym), - reading_group_id: @reading_group.id, - kalibro_configuration_id: @configuration.id}) - range = FactoryGirl.build(:range, {id: nil, reading_id: @reading.id, beginning: '-INF', :end => 'INF', metric_configuration_id: metric_configuration.id}) - range.save + metric: FactoryGirl.build(metric_factory.to_sym), + reading_group_id: @reading_group.id, + kalibro_configuration_id: @configuration.id) + + range = FactoryGirl.create(:range, + reading_id: @reading.id, + beginning: '-INF', + :end => 'INF', + metric_configuration_id: metric_configuration.id) end Given(/^I use the hotspot metric to create a compound metric$/) do code = @metric_configuration.metric.code - @compound = FactoryGirl.create(:compound_metric_configuration, - metric: FactoryGirl.build(:compound_metric, code: "cm", script: "return #{code}() * 2;"), - kalibro_configuration_id: @configuration.id) + metric = FactoryGirl.build(:compound_metric, code: "cm", script: "return #{code}() * 2;") + @compound = FactoryGirl.create(:compound_metric_configuration, + metric: metric, + kalibro_configuration_id: @configuration.id) end When(/^I run for the given repository$/) do From 1e1cbb2800838ebbbf8905773b0a8f19d33ca042 Mon Sep 17 00:00:00 2001 From: Daniel Miranda Date: Sat, 16 Jul 2016 09:56:05 -0300 Subject: [PATCH 3/7] Update database_cleaner setup to use truncation when needed The transaction strategy was only working because we didn't have referential integrity checks. The job workers are not guaranteed to stay in the same the same transaction as the tests - the PHPMD tests, for example, spawned a new thread - and therefore, we must use truncation instead so all the models become visible to the workers in time. --- features/runner.feature | 2 +- features/support/env.rb | 50 +++++++++++++++++------------------------ 2 files changed, 22 insertions(+), 30 deletions(-) diff --git a/features/runner.feature b/features/runner.feature index d8567bc..23066ec 100644 --- a/features/runner.feature +++ b/features/runner.feature @@ -107,7 +107,7 @@ Feature: Runner run And the processing retrieved should have a Root ModuleResult And the Root ModuleResult retrieved should have a list of MetricResults - @clear_repository @kalibro_configuration_restart @docker + @clear_repository @kalibro_configuration_restart @docker @no_transaction Scenario: An existing php repository with a configuration with PHPMD (Hotspot Metrics) Given I have a sample configuration with the PHPMD hotspot metric And I have a sample php repository diff --git a/features/support/env.rb b/features/support/env.rb index da7f0f4..be86804 100644 --- a/features/support/env.rb +++ b/features/support/env.rb @@ -18,10 +18,9 @@ # instead of editing this one. Cucumber will automatically load all features/**/*.rb # files. -# Requiring since the Gemfile is not requiring it -require 'database_cleaner' - require 'cucumber/rails' +# database_cleaner is set up manually below +Cucumber::Rails::Database.autorun_database_cleaner = false # require 'capybara/poltergeist' # Capybara.default_driver = :poltergeist @@ -49,36 +48,29 @@ # ActionController::Base.allow_rescue = false -# Remove/comment out the lines below if your app doesn't have a database. -# For some databases (like MongoDB and CouchDB) you may need to use :truncation instead. -begin - DatabaseCleaner.strategy = :transaction -rescue NameError - raise "You need to add database_cleaner to your Gemfile (in the :test group) if you wish to use it." -end +# Setup database_cleaner. +# Ensure we can disable the transaction strategy for tests that spawn other +# processes/threads, otherwise data won't be visible to them. Ideally we'd just use +# before hooks to set the strategy, but they always run after Around hooks (too late). +# There is also no way to get the current strategy to restore it later. -# You may also want to configure DatabaseCleaner to use different strategies for certain features and scenarios. -# See the DatabaseCleaner documentation for details. Example: -# -# Before('@no-txn,@selenium,@culerity,@celerity,@javascript') do -# # { :except => [:widgets] } may not do what you expect here -# # as Cucumber::Rails::Database.javascript_strategy overrides -# # this setting. -# DatabaseCleaner.strategy = :truncation -# end -# -# Before('~@no-txn', '~@selenium', '~@culerity', '~@celerity', '~@javascript') do -# DatabaseCleaner.strategy = :transaction -# end -# +require 'database_cleaner' + +DatabaseCleaner.clean_with(:truncation) + +Around('@no_transaction') do |scenario, block| + DatabaseCleaner.strategy = :truncation + DatabaseCleaner.cleaning(&block) +end -# Possible values are :truncation and :transaction -# The :transaction strategy is faster, but might give you threading problems. -# See https://github.com/cucumber/cucumber-rails/blob/master/features/choose_javascript_database_strategy.feature -Cucumber::Rails::Database.javascript_strategy = :truncation +Around('~@no_transaction') do |scenario, block| + DatabaseCleaner.strategy = :transaction + DatabaseCleaner.cleaning(&block) +end # Kalibro hooks -require 'kalibro_client/kalibro_cucumber_helpers/hooks.rb' +require 'kalibro_client/kalibro_cucumber_helpers/hooks' +KalibroClient::KalibroCucumberHelpers.clean_configurations # Some steps access this module directly. Require it here so we don't need this line in every step definition that uses some class on this module. require 'downloaders' From f299518efd9688b42e9e03329005aa4b3e3d3a3f Mon Sep 17 00:00:00 2001 From: Daniel Miranda Date: Sat, 16 Jul 2016 09:58:45 -0300 Subject: [PATCH 4/7] Add indexes to foreign key references that are not primary keys PostgreSQL does not create them automatically, but we want them when deletions are cascade to other tables - otherwise we'll have to scan the whole table at that time. Some of the are also used for searching, such as the processing_id in process_times, or the project_id in repositories. --- .../20160720185414_add_indexes_of_foreign_keys.rb | 9 +++++++++ db/schema.rb | 10 +++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 db/migrate/20160720185414_add_indexes_of_foreign_keys.rb diff --git a/db/migrate/20160720185414_add_indexes_of_foreign_keys.rb b/db/migrate/20160720185414_add_indexes_of_foreign_keys.rb new file mode 100644 index 0000000..7c9e79b --- /dev/null +++ b/db/migrate/20160720185414_add_indexes_of_foreign_keys.rb @@ -0,0 +1,9 @@ +class AddIndexesOfForeignKeys < ActiveRecord::Migration + def change + add_index :module_results, :processing_id + add_index :kalibro_modules, :module_result_id + add_index :process_times, :processing_id + add_index :processings, :repository_id + add_index :repositories, :project_id + end +end diff --git a/db/schema.rb b/db/schema.rb index d64edbe..7b11ef6 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -11,7 +11,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20160720185413) do +ActiveRecord::Schema.define(version: 20160720185414) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -41,6 +41,7 @@ end add_index "kalibro_modules", ["long_name", "granularity"], name: "index_kalibro_modules_on_long_name_and_granularity", using: :btree + add_index "kalibro_modules", ["module_result_id"], name: "index_kalibro_modules_on_module_result_id", using: :btree create_table "metric_results", force: :cascade do |t| t.integer "module_result_id" @@ -69,6 +70,7 @@ end add_index "module_results", ["parent_id"], name: "index_module_results_on_parent_id", using: :btree + add_index "module_results", ["processing_id"], name: "index_module_results_on_processing_id", using: :btree create_table "process_times", force: :cascade do |t| t.string "state", limit: 255 @@ -78,6 +80,8 @@ t.float "time" end + add_index "process_times", ["processing_id"], name: "index_process_times_on_processing_id", using: :btree + create_table "processings", force: :cascade do |t| t.string "state", limit: 255 t.integer "repository_id" @@ -87,6 +91,8 @@ t.text "error_message" end + add_index "processings", ["repository_id"], name: "index_processings_on_repository_id", using: :btree + create_table "projects", force: :cascade do |t| t.string "name", limit: 255 t.string "description", limit: 255 @@ -112,6 +118,8 @@ t.string "branch", default: "master", null: false end + add_index "repositories", ["project_id"], name: "index_repositories_on_project_id", using: :btree + add_foreign_key "kalibro_modules", "module_results", on_delete: :cascade add_foreign_key "metric_results", "module_results", on_delete: :cascade add_foreign_key "metric_results", "related_hotspot_metric_results", column: "related_hotspot_metric_results_id" From 6a1a8f77920f426cb03c632db350cc1d53bb4610 Mon Sep 17 00:00:00 2001 From: Daniel Miranda Date: Sat, 16 Jul 2016 09:59:32 -0300 Subject: [PATCH 5/7] Fix missing mock in RepositoriesController test A Processing is created unconditionally, and therefore, always needs to be mocked, even when an error will immediately be thrown. --- spec/controllers/repositories_controller_spec.rb | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/spec/controllers/repositories_controller_spec.rb b/spec/controllers/repositories_controller_spec.rb index a48f08c..c7e2506 100644 --- a/spec/controllers/repositories_controller_spec.rb +++ b/spec/controllers/repositories_controller_spec.rb @@ -173,9 +173,9 @@ end describe 'process' do - context 'with a successful processing' do - let!(:processing) { FactoryGirl.build(:processing) } + let(:processing) { FactoryGirl.build(:processing) } + context 'with a successful processing' do before :each do Processing.expects(:create).with(repository: repository, state: "PREPARING").returns(processing) Repository.expects(:find).with(repository.id).returns(repository) @@ -189,6 +189,7 @@ context 'with an unsuccessful processing' do before :each do + Processing.expects(:create).with(repository: repository, state: "PREPARING").returns(processing) Repository.expects(:find).with(repository.id).returns(repository) repository.expects(:process).raises(Errors::ProcessingError) From a57e571ebe84f07ea4c9ddd0bcd19837cc3b1983 Mon Sep 17 00:00:00 2001 From: Daniel Miranda Date: Mon, 18 Jul 2016 07:37:15 -0300 Subject: [PATCH 6/7] Fix missing mock in ProcessingJob spec --- spec/jobs/processing_job_spec.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spec/jobs/processing_job_spec.rb b/spec/jobs/processing_job_spec.rb index bad0382..49e12fd 100644 --- a/spec/jobs/processing_job_spec.rb +++ b/spec/jobs/processing_job_spec.rb @@ -29,6 +29,8 @@ Processor::Aggregator.expects(:perform) Processor::CompoundResultCalculator.expects(:perform) Processor::Interpreter.expects(:perform) + + periodic_processing.expects(:update).with(state: "READY") end it 'should process the repository periodically' do From a8777c6f35de6276437ebd318b3496726a05956b Mon Sep 17 00:00:00 2001 From: Daniel Miranda Date: Thu, 28 Jul 2016 14:58:56 -0300 Subject: [PATCH 7/7] Make backups in CleanInconsistencies migration --- .../20160720185407_clean_inconsistencies.rb | 118 +++++++++++------- 1 file changed, 74 insertions(+), 44 deletions(-) diff --git a/db/migrate/20160720185407_clean_inconsistencies.rb b/db/migrate/20160720185407_clean_inconsistencies.rb index 8732cf8..d99fd20 100644 --- a/db/migrate/20160720185407_clean_inconsistencies.rb +++ b/db/migrate/20160720185407_clean_inconsistencies.rb @@ -1,5 +1,34 @@ +require 'fileutils' + class CleanInconsistencies < ActiveRecord::Migration - def self.up + def backup_dir + return @backup_dir if @backup_dir + @backup_dir = Rails.root.join('db', 'backup', Time.now.strftime('%Y-%m-%d_%H-%M-%S')) + FileUtils.mkdir_p(@backup_dir) + end + + def backup(name, query, header: true) + say_with_time("backup(#{name.inspect}, #{query.inspect})") do + copy_query = "COPY (#{query}) TO STDOUT WITH DELIMITER ',' CSV #{header ? 'HEADER' : ''}" + + File.open(backup_dir.join("#{name}.csv"), 'a') do |f| + connection.raw_connection.copy_data(copy_query) do + while line = connection.raw_connection.get_copy_data + f.write(line) + end + end + end + end + end + + def backup_and_delete_missing(table, exists_query) + backup(table, "SELECT * FROM \"#{table}\" WHERE NOT EXISTS(#{exists_query})") + execute "DELETE FROM \"#{table}\" WHERE NOT EXISTS(#{exists_query})" + end + + def up + say "WARNING: destructive migration necessary. Deleted data will be backed up to #{backup_dir}" + # Unset project reference for repositories with non-existing projects execute <<-SQL UPDATE repositories AS r @@ -10,61 +39,62 @@ def self.up SQL # Delete processings with non-existing repositories - execute <<-SQL - DELETE FROM processings AS p - WHERE NOT EXISTS( - SELECT 1 FROM repositories AS r WHERE r.id = p.repository_id - ) - SQL + backup_and_delete_missing("processings", + "SELECT 1 FROM repositories AS r WHERE r.id = processings.repository_id") # Delete process times with non-existing processings - execute <<-SQL - DELETE FROM process_times AS t - WHERE NOT EXISTS ( - SELECT 1 FROM processings AS p WHERE p.id = t.processing_id - ) - SQL + backup_and_delete_missing("process_times", + "SELECT 1 FROM processings AS p WHERE p.id = process_times.processing_id") # Delete module results with non-existing processings - execute <<-SQL - DELETE FROM module_results AS m - WHERE NOT EXISTS ( - SELECT 1 FROM processings AS p WHERE p.id = m.processing_id - ) - SQL + backup_and_delete_missing("module_results", + "SELECT 1 FROM processings AS p WHERE p.id = module_results.processing_id") # Delete kalibro modules with non-existing module results + backup_and_delete_missing("kalibro_modules", + "SELECT 1 FROM module_results AS m WHERE m.id = kalibro_modules.module_result_id") + + # Fix up metric results type, even before backing up so the backup is cleaner execute <<-SQL - DELETE FROM kalibro_modules AS k - WHERE NOT EXISTS ( - SELECT 1 FROM module_results AS m WHERE m.id = k.module_result_id - ) + UPDATE metric_results SET "type" = 'TreeMetricResult' WHERE "type" = 'MetricResult' SQL # Delete metric results with non-existing module results - execute <<-SQL - DELETE FROM metric_results AS met - WHERE NOT EXISTS ( - SELECT 1 FROM module_results AS mod WHERE mod.id = met.module_result_id - ) - SQL + backup_and_delete_missing("metric_results", + "SELECT 1 FROM module_results AS m WHERE m.id = metric_results.module_result_id") - # Delete duplicate metric_results. Group them by (module_result, metric_configuration), - # then delete all but the one with the highest ID - # The double wrapping on the inner query is necessary because window functions - # cannot be used in WHERE in PostgreSQL. - execute <<-SQL - DELETE FROM metric_results - WHERE id IN ( - SELECT t.id FROM ( - SELECT id, ROW_NUMBER() OVER (PARTITION BY module_result_id, metric_configuration_id, "type" - ORDER BY id DESC) AS rnum - FROM metric_results - WHERE "type" = 'TreeMetricResult' - ) AS t - WHERE t.rnum > 1 - ) + # Delete duplicate metric_results. Group them by (module_result_id, metric_configuration_id), + # then delete all but the one with the highest ID. The double wrapping on the inner query is + # necessary because window functions cannot be used in WHERE in PostgreSQL. + repeated_metric_result_query = exec_query <<-SQL + SELECT t.id FROM ( + SELECT metric_results.*, ROW_NUMBER() OVER ( + PARTITION BY module_result_id, metric_configuration_id, "type" + ORDER BY id DESC) AS rnum + FROM metric_results + WHERE "type" = 'TreeMetricResult' + ) AS t + WHERE t.rnum > 1 SQL + + unless repeated_metric_result_query.empty? + repeated_metric_result_ids = repeated_metric_result_query.rows.flat_map(&:first).join(',') + + # Replace default messages with custom ones to avoid flooding the screen with the huge query + say_with_time('backup("metric_results", "SELECT * metric_results WHERE id IN (...)")') do + suppress_messages do + backup('metric_results', + "SELECT * FROM metric_results WHERE id IN (#{repeated_metric_result_ids})", + header: false) + end + end + + say_with_time('execute("DELETE FROM metric_results WHERE id IN (...)")') do + suppress_messages do + execute "DELETE FROM metric_results WHERE id IN (#{repeated_metric_result_ids})" + end + end + end end def self.down