Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import com.linkedin.photon.ml.TaskType
import com.linkedin.photon.ml.Types.REId
import com.linkedin.photon.ml.data.{FixedEffectDataset, LocalDataset, RandomEffectDataset}
import com.linkedin.photon.ml.function.{DistributedObjectiveFunction, ObjectiveFunctionHelper, SingleNodeObjectiveFunction}
import com.linkedin.photon.ml.model.{FixedEffectModel, RandomEffectModel}
import com.linkedin.photon.ml.normalization.NormalizationContext
import com.linkedin.photon.ml.optimization.game.{FixedEffectOptimizationConfiguration, RandomEffectOptimizationConfiguration}
import com.linkedin.photon.ml.optimization.{OptimizerConfig, OptimizerType, SingleNodeOptimizationProblem, VarianceComputationType}
Expand All @@ -46,6 +47,7 @@ class CoordinateFactoryIntegTest extends SparkTestUtils {

val mockDataset = mock(classOf[FixedEffectDataset])
val optimizationConfiguration = FixedEffectOptimizationConfiguration(OPTIMIZER_CONFIG)
val priorModelOpt: Option[FixedEffectModel] = None

doReturn(sc).when(mockDataset).sparkContext

Expand All @@ -57,6 +59,7 @@ class CoordinateFactoryIntegTest extends SparkTestUtils {
DOWN_SAMPLER_FACTORY,
MOCK_NORMALIZATION,
VARIANCE_COMPUTATION_TYPE,
priorModelOpt,
INTERCEPT_INDEX)

coordinate match {
Expand All @@ -78,8 +81,10 @@ class CoordinateFactoryIntegTest extends SparkTestUtils {
val mockProjectorsRDD = mock(classOf[RDD[(REId, LinearSubspaceProjector)]])
val mockProblemsRDD = mock(classOf[RDD[(REId, SingleNodeOptimizationProblem[SingleNodeObjectiveFunction])]])
val optimizationConfiguration = RandomEffectOptimizationConfiguration(OPTIMIZER_CONFIG)
val priorModelOpt: Option[RandomEffectModel] = None

doReturn(sc).when(mockDataset).sparkContext
doReturn(sc).when(mockProjectorsRDD).sparkContext
doReturn(mockDataRDD).when(mockDataset).activeData
doReturn(mockDataRDD)
.when(mockDataRDD)
Expand All @@ -97,6 +102,7 @@ class CoordinateFactoryIntegTest extends SparkTestUtils {
DOWN_SAMPLER_FACTORY,
MOCK_NORMALIZATION,
VARIANCE_COMPUTATION_TYPE,
priorModelOpt,
INTERCEPT_INDEX)

coordinate match {
Expand Down Expand Up @@ -124,6 +130,7 @@ class CoordinateFactoryIntegTest extends SparkTestUtils {
DOWN_SAMPLER_FACTORY,
MOCK_NORMALIZATION,
VARIANCE_COMPUTATION_TYPE,
None,
INTERCEPT_INDEX)
}
}
Expand All @@ -139,7 +146,7 @@ object CoordinateFactoryIntegTest {
private val INTERCEPT_INDEX = None

private val OPTIMIZER_CONFIG = OptimizerConfig(OPTIMIZER_TYPE, MAX_ITER, TOLERANCE)
private val MOCK_NORMALIZATION = mock(classOf[NormalizationContext])
private val MOCK_NORMALIZATION = mock(classOf[NormalizationContext], withSettings().serializable())

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What was the reason for this change?

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Integ test throws an error that MOCK_NORMALIZATION is not serializable. Original class NormalizatoinContext extends Serializable. Need to set MOCK_NORMALIZATION to be serializable as well.

private val GLM_CONSTRUCTOR = LogisticRegressionModel.apply _
private val LOSS_FUNCTION_FACTORY = ObjectiveFunctionHelper.buildFactory(TRAINING_TASK, TREE_AGGREGATE_DEPTH)
private val DOWN_SAMPLER_FACTORY = DownSamplerHelper.buildFactory(TRAINING_TASK)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ package com.linkedin.photon.ml.algorithm
import com.linkedin.photon.ml.data.{Dataset, FixedEffectDataset, RandomEffectDataset}
import com.linkedin.photon.ml.function.ObjectiveFunctionHelper.{DistributedObjectiveFunctionFactory, ObjectiveFunctionFactoryFactory, SingleNodeObjectiveFunctionFactory}
import com.linkedin.photon.ml.function.ObjectiveFunction
import com.linkedin.photon.ml.model.Coefficients
import com.linkedin.photon.ml.model.{Coefficients, DatumScoringModel, FixedEffectModel, RandomEffectModel}
import com.linkedin.photon.ml.normalization.NormalizationContext
import com.linkedin.photon.ml.optimization.DistributedOptimizationProblem
import com.linkedin.photon.ml.optimization.VarianceComputationType.VarianceComputationType
Expand Down Expand Up @@ -45,6 +45,7 @@ object CoordinateFactory {
* @param downSamplerFactory A factory function for the [[DownSampler]] (if down-sampling is enabled)
* @param normalizationContext The [[NormalizationContext]]
* @param varianceComputationType Should the trained coefficient variances be computed in addition to the means?
* @param priorModelOpt The prior model for warm-start and incremental training
* @param interceptIndexOpt The index of the intercept, if one is present
* @return A [[Coordinate]] for the [[Dataset]] of type [[D]]
*/
Expand All @@ -56,15 +57,17 @@ object CoordinateFactory {
downSamplerFactory: DownSamplerFactory,
normalizationContext: NormalizationContext,
varianceComputationType: VarianceComputationType,
priorModelOpt: Option[DatumScoringModel],
interceptIndexOpt: Option[Int]): Coordinate[D] = {

val lossFunctionFactory = lossFunctionFactoryConstructor(coordinateOptConfig)

(dataset, coordinateOptConfig, lossFunctionFactory) match {
(dataset, coordinateOptConfig, lossFunctionFactory, priorModelOpt) match {
case (
fEDataset: FixedEffectDataset,
fEOptConfig: FixedEffectOptimizationConfiguration,
distributedLossFunctionFactory: DistributedObjectiveFunctionFactory) =>
fEDataset: FixedEffectDataset,
fEOptConfig: FixedEffectOptimizationConfiguration,
distributedLossFunctionFactory: DistributedObjectiveFunctionFactory,
fixedEffectModelOpt: Option[FixedEffectModel]) =>

val downSamplerOpt = if (DownSampler.isValidDownSamplingRate(fEOptConfig.downSamplingRate)) {
Some(downSamplerFactory(fEOptConfig.downSamplingRate))
Expand All @@ -77,21 +80,23 @@ object CoordinateFactory {
fEDataset,
DistributedOptimizationProblem(
fEOptConfig,
distributedLossFunctionFactory(interceptIndexOpt),
distributedLossFunctionFactory(fixedEffectModelOpt.map(_.model), interceptIndexOpt),
downSamplerOpt,
glmConstructor,
normalizationPhotonBroadcast,
varianceComputationType)).asInstanceOf[Coordinate[D]]

case (
rEDataset: RandomEffectDataset,
rEOptConfig: RandomEffectOptimizationConfiguration,
singleNodeLossFunctionFactory: SingleNodeObjectiveFunctionFactory) =>
rEDataset: RandomEffectDataset,
rEOptConfig: RandomEffectOptimizationConfiguration,
singleNodeLossFunctionFactory: SingleNodeObjectiveFunctionFactory,
randomEffectModelOpt: Option[RandomEffectModel]) =>

RandomEffectCoordinate(
rEDataset,
rEOptConfig,
singleNodeLossFunctionFactory,
randomEffectModelOpt,
glmConstructor,
normalizationContext,
varianceComputationType,
Expand All @@ -100,9 +105,10 @@ object CoordinateFactory {
case _ =>
throw new UnsupportedOperationException(
s"""Cannot build coordinate for the following input class combination:
| ${dataset.getClass.getName}
| ${coordinateOptConfig.getClass.getName}
| ${lossFunctionFactory.getClass.getName}""".stripMargin)
| ${dataset.getClass.getName}
| ${coordinateOptConfig.getClass.getName}
| ${lossFunctionFactory.getClass.getName}
| ${priorModelOpt.getClass.getName}""".stripMargin)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,7 @@ protected[ml] class RandomEffectCoordinate[Objective <: SingleNodeObjectiveFunct
* @param model The model to use as a starting point
* @return A (updated model, optional optimization tracking information) tuple
*/
override protected[algorithm] def trainModel(
model: DatumScoringModel): (DatumScoringModel, OptimizationTracker) =
override protected[algorithm] def trainModel(model: DatumScoringModel): (DatumScoringModel, OptimizationTracker) =

model match {
case randomEffectModel: RandomEffectModel =>
Expand Down Expand Up @@ -183,17 +182,19 @@ object RandomEffectCoordinate {
* problems
* @param randomEffectDataset The data on which to run the optimization algorithm
* @param configuration The optimization problem configuration
* @param objectiveFunctionFactory The objective function to optimize
* @param objectiveFunctionFactory The objective function factory option
* @param priorRandomEffectModelOpt The prior randomEffectModel option
* @param glmConstructor The function to use for producing GLMs from trained coefficients
* @param normalizationContext The normalization context
* @param varianceComputationType If and how coefficient variances should be computed
* @param interceptIndexOpt The index of the intercept, if there is one
* @return A new [[RandomEffectCoordinate]] object
* @return A new [[RandomEffectCoordinate]]
*/
protected[ml] def apply[RandomEffectObjective <: SingleNodeObjectiveFunction](
randomEffectDataset: RandomEffectDataset,
configuration: RandomEffectOptimizationConfiguration,
objectiveFunctionFactory: Option[Int] => RandomEffectObjective,
objectiveFunctionFactory: (Option[GeneralizedLinearModel], Option[Int]) => RandomEffectObjective,
priorRandomEffectModelOpt: Option[RandomEffectModel],
glmConstructor: Coefficients => GeneralizedLinearModel,
normalizationContext: NormalizationContext,
varianceComputationType: VarianceComputationType = VarianceComputationType.NONE,
Expand All @@ -204,6 +205,7 @@ object RandomEffectCoordinate {
randomEffectDataset.projectors,
configuration,
objectiveFunctionFactory,
priorRandomEffectModelOpt,
glmConstructor,
normalizationContext,
varianceComputationType,
Expand Down
Loading