diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/SHPExecutionStage.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/SHPExecutionStage.java new file mode 100644 index 000000000..315b71f09 --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/SHPExecutionStage.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.block_app.library.partitioning; + +import org.apache.giraph.block_app.library.iteration.IterationStage; + +/** + * Interface for execution stage for SHP calculation + */ +public interface SHPExecutionStage extends IterationStage { + @Override + SHPExecutionStage changedIteration(int iteration); + int getSplits(); + SHPExecutionStage changedSplits(int splits); + int getNumBuckets(); + SHPExecutionStage changedNumBuckets(int numBuckets); + int getLastSplitIteration(); + SHPExecutionStage changedLastSplitIteration(int iteration); + int getIterationsFromSplit(); +} diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/SHPExecutionStageImpl.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/SHPExecutionStageImpl.java new file mode 100644 index 000000000..d76a04d3a --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/SHPExecutionStageImpl.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.block_app.library.partitioning; + +/** + * Execution stage for SHP calculation + */ +public class SHPExecutionStageImpl implements SHPExecutionStage { + private final int iteration; + private final int splits; + private final int numBuckets; + private final int lastSplitIteration; + + public SHPExecutionStageImpl() { + this.iteration = 0; + this.splits = 0; + this.numBuckets = 0; + this.lastSplitIteration = 0; + } + + public SHPExecutionStageImpl( + int iteration, int splits, int numBuckets, int lastSplitIteration) { + this.iteration = iteration; + this.splits = splits; + this.numBuckets = numBuckets; + this.lastSplitIteration = lastSplitIteration; + } + + @Override + public int getIteration() { + return iteration; + } + + @Override + public int getSplits() { + return splits; + } + + @Override + public int getNumBuckets() { + return numBuckets; + } + + @Override + public SHPExecutionStageImpl changedIteration(int iteration) { + return new SHPExecutionStageImpl( + iteration, this.splits, this.numBuckets, this.lastSplitIteration); + } + + @Override + public SHPExecutionStageImpl changedSplits(int splits) { + return new SHPExecutionStageImpl( + this.iteration, splits, this.numBuckets, this.lastSplitIteration); + } + + @Override + public SHPExecutionStageImpl changedNumBuckets(int numBuckets) { + return new SHPExecutionStageImpl( + this.iteration, this.splits, numBuckets, this.lastSplitIteration); + } + + @Override + public int getLastSplitIteration() { + return lastSplitIteration; + } + + @Override + public SHPExecutionStage changedLastSplitIteration(int lastSplitIteration) { + return new SHPExecutionStageImpl( + this.iteration, this.splits, this.numBuckets, lastSplitIteration); + } + + @Override + public int getIterationsFromSplit() { + return this.iteration - this.lastSplitIteration; + } + + @Override + public String toString() { + return "BPExecutionStage [iteration=" + iteration + + ", iterationFromSplit=" + getIterationsFromSplit() + ", splits=" + + splits + ", numPartitions=" + numBuckets + "]"; + } +} diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/SHPLoggingBuilder.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/SHPLoggingBuilder.java new file mode 100644 index 000000000..7caafa2ac --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/SHPLoggingBuilder.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.block_app.library.partitioning; + +import org.apache.giraph.block_app.framework.api.BlockMasterApi; +import org.apache.giraph.block_app.library.stats.DirectedGraphStats; +import org.apache.log4j.Logger; + +/** Logging utility */ +public class SHPLoggingBuilder { + private static final Logger LOG = Logger.getLogger(DirectedGraphStats.class); + + private final StringBuilder sb = new StringBuilder(); + + public SHPLoggingBuilder appendLine(String line) { + sb.append("\nM> " + line); + return this; + } + + public SHPLoggingBuilder appendLine(String format, Object... args) { + appendLine(String.format(format, args)); + return this; + } + + public void logToCommandLine(BlockMasterApi masterApi) { + LOG.info(sb); + masterApi.logToCommandLine(sb.toString()); + } + + public static void setCounter( + String counterName, long value, + BlockMasterApi masterApi, SHPExecutionStage executionStage) { + masterApi.getCounter( + "SocialHashPartitioner Stats", + String.format(counterName + " in %6d iteration after %d splits", + executionStage.getIteration(), executionStage.getSplits()) + ).setValue(value); + } + + public static void setCounter( + String counterName, double value, + BlockMasterApi masterApi, SHPExecutionStage executionStage) { + masterApi.getCounter( + "SocialHashPartitioner Stats", + String.format(counterName + " in %6d iteration after %d splits (*K)", + executionStage.getIteration(), executionStage.getSplits()) + ).setValue((long) (value * 1000)); + } +} diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/SocialHashPartitionerBlockFactory.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/SocialHashPartitionerBlockFactory.java new file mode 100644 index 000000000..314f6329b --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/SocialHashPartitionerBlockFactory.java @@ -0,0 +1,318 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.block_app.library.partitioning; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.giraph.block_app.framework.AbstractBlockFactory; +import org.apache.giraph.block_app.framework.block.Block; +import org.apache.giraph.block_app.framework.block.EmptyBlock; +import org.apache.giraph.block_app.framework.block.RepeatBlock; +import org.apache.giraph.block_app.framework.block.RepeatUntilBlock; +import org.apache.giraph.block_app.framework.block.SequenceBlock; +import org.apache.giraph.block_app.library.Pieces; +import org.apache.giraph.block_app.library.VertexSuppliers; +import org.apache.giraph.block_app.library.iteration.IterationCounterPiece; +import org.apache.giraph.block_app.library.partitioning.assignment.AllToSameBucketAssigner; +import org.apache.giraph.block_app.library.partitioning.assignment.BucketAssigner; +import org.apache.giraph.block_app.library.partitioning.assignment.RandomBucketAssigner; +import org.apache.giraph.block_app.library.partitioning.assignment.SHPInitializePiece; +import org.apache.giraph.block_app.library.partitioning.confs.SHPPaperConfs; +import org.apache.giraph.block_app.library.partitioning.decide.SHPDecideUtils; +import org.apache.giraph.block_app.library.partitioning.recursive.RecursiveSettings; +import org.apache.giraph.block_app.library.partitioning.recursive.SHPSplitPiece; +import org.apache.giraph.block_app.library.partitioning.vertex.CachedNeighborData; +import org.apache.giraph.block_app.library.partitioning.vertex.SocialHashPartitionerVertexValue; +import org.apache.giraph.block_app.library.prepare_graph.PrepareGraphPieces; +import org.apache.giraph.block_app.library.stats.PartitioningStats; +import org.apache.giraph.conf.GiraphConfiguration; +import org.apache.giraph.conf.GiraphConstants; +import org.apache.giraph.edge.Edge; +import org.apache.giraph.function.Consumer; +import org.apache.giraph.function.Function; +import org.apache.giraph.function.ObjectTransfer; +import org.apache.giraph.function.vertex.SupplierFromVertex; +import org.apache.giraph.types.ops.LongTypeOps; +import org.apache.giraph.types.ops.collections.ResettableIterator; +import org.apache.giraph.types.ops.collections.array.WLongArrayList; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; + +import com.google.common.base.Preconditions; + +import it.unimi.dsi.fastutil.longs.LongOpenHashSet; + +/** Main entry point to Social Hash Partitioner */ +public class SocialHashPartitionerBlockFactory + extends AbstractBlockFactory { + + @Override + public Block createBlock(GiraphConfiguration conf) { + Block prepareGraphBlock = new SequenceBlock( + SocialHashPartitionerSettings.KEEP_STANDALONE_VERTICES.get(conf) ? + new EmptyBlock() : PrepareGraphPieces.removeStandAloneVertices(), + PrepareGraphPieces.removeDuplicateEdges(LongTypeOps.INSTANCE), + Pieces. + forAllVertices( + "mark_as_data", + vertex -> { + vertex.getValue().setShouldVertexBeAssignedBucket(true); + vertex.getValue().initUselessEdgesSet(vertex.getNumEdges()); + }) + ); + + // initialize + RecursiveSettings recursive = + SocialHashPartitionerSettings.RECURSIVE.createObject(conf); + Block initializeBlock = createInitializeBlock(conf, recursive); + + // iterations + int numIterationsPerSplit = + SocialHashPartitionerSettings.NUM_ITERATIONS_PER_SPLIT.get(conf); + Block iterationsBlock = createSplittingIterationsBlock( + conf, + numIterationsPerSplit, + recursive + ); + + return new SequenceBlock( + prepareGraphBlock, + initializeBlock, + iterationsBlock, + calcEndStatsBlock(conf, recursive) + ); + } + + private Block calcEndStatsBlock( + GiraphConfiguration conf, RecursiveSettings recursive) { + SupplierFromVertex bucketSupplier = + vertex -> { + if (vertex.getValue().getShouldVertexBeAssignedBucket()) { + if (vertex.getValue().getCurrentBucket() != + SocialHashPartitionerVertexValue.BUCKET_NOT_SET) { + return new LongWritable(vertex.getValue().getCurrentBucket()); + } + } + return null; + }; + return new SequenceBlock( + PartitioningStats.calculateFanout(bucketSupplier, null), + PartitioningStats.calculateImbalance( + recursive.getFinalNumBuckets(), bucketSupplier, null) + ); + } + + protected static Block createInitializeBlock( + GiraphConfiguration conf, RecursiveSettings recursive) { + return moveAfterBlock( + createBucketAssigner( + conf, + (bucketAssigner) -> SHPInitializePiece.create( + recursive.getInitialNumBuckets(), bucketAssigner) + ), + conf, + null + ); + } + + public static Block moveAfterBlock( + Block block, GiraphConfiguration conf, Consumer converged) { + return new SequenceBlock( + block, + SocialHashPartitionerSettings.OPTIMIZE_FOR.get(conf).createMoveBlock( + conf, converged) + ); + } + + public static + Block createBucketAssigner( + Configuration conf, + Function, Block> assignBlock + ) { + String assignWith = SocialHashPartitionerSettings.ASSIGN_WITH.get(conf); + switch (assignWith) { + case "Random": + return assignBlock.apply(new RandomBucketAssigner<>()); + case "AllToSame": + return assignBlock.apply(new AllToSameBucketAssigner<>()); +// case "MinHash": +// return MinHashInitializer.createAssignBlock(assignBlock, conf); +// case "BFS": +// return BFSInitializerForBalancedPartitioning.createAssignBlock( +// assignBlock, conf, SocialHashSettings.getNumFinalBuckets(conf)); + default: + throw new IllegalArgumentException( + "Unknown bucket assigner option specified - " + assignWith); + } + } + + /** + * Factory which creates set of iterations of balance partitioning, + * interleaved with set of splits. + */ + public static Block createSplittingIterationsBlock( + GiraphConfiguration conf, + int numIterationsPerSplit, + RecursiveSettings recursiveSettings + ) { + int numSplits = recursiveSettings.getNumSplits(); + + List iterationsBlocks = new ArrayList<>(); + if (recursiveSettings.runIterationsBeforeFirstSplit()) { + + iterationsBlocks.add( + createIterationsBlock(conf, numIterationsPerSplit, recursiveSettings) + ); + } + + if (numSplits > 0) { + Preconditions.checkState(recursiveSettings.isRecursiveSplitting()); + iterationsBlocks.add(new RepeatBlock( + numSplits, + new SequenceBlock( + markUselessEdges(), + moveAfterBlock( + createBucketAssigner( + conf, + (bucketAssigner) -> new SHPSplitPiece<>( + recursiveSettings, bucketAssigner)), + conf, + null), + createIterationsBlock( + conf, + numIterationsPerSplit, + recursiveSettings) + ) + )); + } + return new SequenceBlock(iterationsBlocks); + } + + /** + * Creates set of iterations of balance partitioning + */ + public static Block createIterationsBlock( + GiraphConfiguration conf, int numIterations, + RecursiveSettings recursiveSettings) { + ObjectTransfer converged = new ObjectTransfer<>(); + Block iterationsBlock = new SequenceBlock( + new RepeatUntilBlock( + numIterations, + new SequenceBlock( + moveAfterBlock( + SHPDecideUtils.createDecideBlock(conf, recursiveSettings), + conf, converged), + new IterationCounterPiece() + ), + converged + ) + ); + + return iterationsBlock; + } + + public static Block markUselessEdges() { + WLongArrayList targets = new WLongArrayList(); + ResettableIterator targetsIter = targets.fastIteratorW(); + return Pieces. + sendMessage( + "MarkUselessEdges", + LongWritable.class, + VertexSuppliers.vertexIdSupplier(), + (vertex) -> { + targets.clear(); + CachedNeighborData neighbors = + vertex.getValue().updateNeighborData( + Collections.emptyIterator()); + if (neighbors != null) { + int start = 0; + while (start < neighbors.size()) { + int end = start + 1; + while (end < neighbors.size() && + neighbors.getBucket(start) == neighbors.getBucket(end)) { + end++; + } + if (end == start + 1) { + targets.add(neighbors.getId(start)); + } + start = end; + } + vertex.getValue().incrementRemovedIsolatedNeighbors( + targets.size()); + targetsIter.reset(); + return targetsIter; + } else { + return null; + } + }, + (vertex, messages) -> { + LongOpenHashSet toRemove = new LongOpenHashSet(); + for (LongWritable message : messages) { + toRemove.add(message.get()); + } + int index = 0; + for (Edge edge : vertex.getEdges()) { + if (toRemove.contains(edge.getTargetVertexId().get())) { + vertex.getValue().markEdgeAsUseless(index); + } + index++; + } + }); + } + + @Override + public SHPExecutionStage createExecutionStage(GiraphConfiguration conf) { + return new SHPExecutionStageImpl(); + } + + @Override + protected Class getVertexIDClass( + GiraphConfiguration conf) { + return LongWritable.class; + } + + @Override + protected Class getVertexValueClass( + GiraphConfiguration conf) { + return SocialHashPartitionerSettings.OPTIMIZE_FOR.get(conf) + .getVertexValueClass(conf); + } + + @Override + protected Class getEdgeValueClass( + GiraphConfiguration conf) { + return NullWritable.class; + } + + @Override + protected void additionalInitConfig(GiraphConfiguration conf) { + GiraphConstants.RESOLVER_CREATE_VERTEX_ON_MSGS.setIfUnset(conf, true); + } + + @Override + protected String[] getConvenienceConfiguratorPackages() { + return new String[] { SHPPaperConfs.class.getPackage().getName() }; + } +} diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/SocialHashPartitionerSettings.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/SocialHashPartitionerSettings.java new file mode 100644 index 000000000..ad9737163 --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/SocialHashPartitionerSettings.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.block_app.library.partitioning; + +import org.apache.giraph.block_app.library.partitioning.goal.SHPOptimizationGoals; +import org.apache.giraph.block_app.library.partitioning.recursive.RecursiveSettings; +import org.apache.giraph.compiling.LambdaConfOption; +import org.apache.giraph.compiling.ObjectInitializerConfOption; +import org.apache.giraph.conf.BooleanConfOption; +import org.apache.giraph.conf.EnumConfOption; +import org.apache.giraph.conf.FloatConfOption; +import org.apache.giraph.conf.IntConfOption; +import org.apache.giraph.conf.StrConfOption; +import org.apache.giraph.function.primitive.Obj2FloatFunction; + +import com.google.common.reflect.TypeToken; + +/** All configurable parameters for Social Hash Partitioner */ +public class SocialHashPartitionerSettings { + public static final ObjectInitializerConfOption + RECURSIVE = new ObjectInitializerConfOption<>( + "social_hash_partitioner.recursive", RecursiveSettings.class, + "", "Recursive settings to use"); + + public static final IntConfOption + NUM_ITERATIONS_PER_SPLIT = new IntConfOption( + "social_hash_partitioner.num_iterations_per_split", 50, + "Num iterations"); + + public static final StrConfOption + ASSIGN_WITH = new StrConfOption( + "social_hash_partitioner.assign_with", "Random", + "Which bucket assigner to use, when vertex has no initial bucket in " + + "input. Current choices are: Random, AllTo0, MinHash"); + + public static final BooleanConfOption + KEEP_STANDALONE_VERTICES = new BooleanConfOption( + "social_hash_partitioner.keep_standalone_vertices", true, ""); + + public static final EnumConfOption + OPTIMIZE_FOR = new EnumConfOption<>( + "social_hash_partitioner.optimize_for", + SHPOptimizationGoals.class, SHPOptimizationGoals.FANOUT, + "Optimization goal (objective function) name. Choices are in " + + "SHPOptimizationGoals enum."); + + public static final LambdaConfOption> + FANOUT_PROBABILITY = new LambdaConfOption<>( + "social_hash_partitioner.fanout.fanout_probability", + new TypeToken>() { }, + "0.5f", + "smoothing factor used for fanout, between 0.0f and 1.0f", + "stage"); + + public static final BooleanConfOption + USE_FINAL_P_FANOUT = new BooleanConfOption( + "social_hash_partitioner.fanout.use_final_p_fanout", true, + ""); + + public static final FloatConfOption + ALLOWED_IMBALANCE = new FloatConfOption( + "social_hash_partitioner.allowed_imbalance", 0.0f, + "allowed imbalance between buckets, between 0.0f and 1.0f"); + + public static final FloatConfOption + CONVERGE_MOVE_THRESHOLD = new FloatConfOption( + "social_hash_partitioner.converge.move_threshold", 0.0001f, ""); + + public static final FloatConfOption + CONVERGE_OBJECTIVE_THRESHOLD = new FloatConfOption( + "social_hash_partitioner.converge.objective_threshold", 0f, ""); + + public static final FloatConfOption + MOVE_PROBABILITY = new FloatConfOption( + "social_hash_partitioner.decide.move_probability", 0.8f, ""); + + public static final FloatConfOption + MAX_MOVE_RATIO = new FloatConfOption( + "social_hash_partitioner.decide.max_move_ratio", 0.1f, ""); + + public static final IntConfOption + GAIN_HISTOGRAM_NUM_QUARTER_BINS = new IntConfOption( + "social_hash_partitioner.decide.gain_histogram_num_quarter_bins", 30, + ""); + + public static final FloatConfOption + GAIN_HISTOGRAM_EXPONENT = new FloatConfOption( + "social_hash_partitioner.decide.gain_histogram_exponent", + (float) Math.pow(2, 0.5), ""); + + public static final FloatConfOption + IGNORE_MOVE_GAIN_THRESHOLD = new FloatConfOption( + "social_hash_partitioner.move.ignore_gain_threshold", 1e-8f, ""); + + public static final FloatConfOption + SAMPLING_MOVE_GAIN_THRESHOLD = new FloatConfOption( + "social_hash_partitioner.move.sampling_gain_threshold", 1e-3f, ""); + + public static final float SAMPLING_MOVE_GAIN_RATIO = 0.25f; + + public static final boolean IS_DEBUG = false; + + private SocialHashPartitionerSettings() { } +} diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/assignment/AllToSameBucketAssigner.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/assignment/AllToSameBucketAssigner.java new file mode 100644 index 000000000..6ad7320fb --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/assignment/AllToSameBucketAssigner.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.block_app.library.partitioning.assignment; + +/** + * Assigning all vertices to same bucket - first possible. + * + * @param Vertex value type + */ +public class AllToSameBucketAssigner implements BucketAssigner { + @Override + public int getAssignedBucket( + V vertexValue, int beginBucket, int endBucket, long numVertices) { + return beginBucket; + } +} diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/assignment/BucketAssigner.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/assignment/BucketAssigner.java new file mode 100644 index 000000000..a1202be9f --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/assignment/BucketAssigner.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.block_app.library.partitioning.assignment; + +import java.io.Serializable; + +/** + * Abstraction of assigning initial bucket. + * + * @param Vertex value type + */ +public interface BucketAssigner extends Serializable { + /** Calculate which bucket to assign to. */ + int getAssignedBucket( + V vertexValue, int beginBucket, int endBucket, long numVertices); +} diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/assignment/RandomBucketAssigner.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/assignment/RandomBucketAssigner.java new file mode 100644 index 000000000..10cdd9891 --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/assignment/RandomBucketAssigner.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.block_app.library.partitioning.assignment; + +import org.apache.giraph.writable.kryo.TransientRandom; + +/** + * Assigning each vertex to a random bucket, out of all buckets. + * + * @param Vertex value type + */ +public class RandomBucketAssigner implements BucketAssigner { + private final TransientRandom rand = new TransientRandom(); + + @Override + public int getAssignedBucket( + V vertexValue, int beginBucket, int endBucket, long numVertices) { + return beginBucket + rand.nextInt(endBucket - beginBucket); + } +} diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/assignment/SHPInitializePiece.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/assignment/SHPInitializePiece.java new file mode 100644 index 000000000..75f4557b1 --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/assignment/SHPInitializePiece.java @@ -0,0 +1,160 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.block_app.library.partitioning.assignment; + +import org.apache.giraph.block_app.framework.api.BlockWorkerSendApi; +import org.apache.giraph.block_app.framework.block.Block; +import org.apache.giraph.block_app.framework.block.SequenceBlock; +import org.apache.giraph.block_app.framework.piece.Piece; +import org.apache.giraph.block_app.framework.piece.interfaces.VertexSender; +import org.apache.giraph.block_app.library.Pieces; +import org.apache.giraph.block_app.library.partitioning.SHPExecutionStage; +import org.apache.giraph.block_app.library.partitioning.SocialHashPartitionerSettings; +import org.apache.giraph.block_app.library.partitioning.recursive.RecursiveSettings; +import org.apache.giraph.block_app.library.partitioning.vertex.SocialHashPartitionerVertexValue; +import org.apache.giraph.function.primitive.PrimitiveRefs; +import org.apache.giraph.function.primitive.func.Int2IntFunction; +import org.apache.giraph.reducers.impl.SumReduce; +import org.apache.giraph.types.NoMessage; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Writable; + +import com.google.common.base.Preconditions; + +/** + * Piece that initializes vertices, ignoring those that have initial bucket + * in the input. Updates candidate and initial bucket on a vertex to + * appropriate bucket. + * + * @param Vertex value type + */ +public class SHPInitializePiece + extends Piece { + + private final int numInitialBuckets; + private final BucketAssigner bucketAssigner; + private final PrimitiveRefs.LongRef numVerticesHolder; + + private SHPInitializePiece( + int numInitialBuckets, + BucketAssigner bucketAssigner, + PrimitiveRefs.LongRef numVerticesHolder + ) { + this.numInitialBuckets = numInitialBuckets; + this.bucketAssigner = bucketAssigner; + this.numVerticesHolder = numVerticesHolder; + } + + public static Block create( + int initialNumBuckets, + BucketAssigner bucketAssigner + ) { + LongWritable zero = new LongWritable(0); + LongWritable one = new LongWritable(1); + PrimitiveRefs.LongRef numVerticesHolder = new PrimitiveRefs.LongRef(0); + Piece reduceNumVerticesPiece = Pieces.reduce( + "ReduceNumVerticesPiece", + SumReduce.LONG, + (vertex) -> { + return ((V) vertex.getValue()).getShouldVertexBeAssignedBucket() ? + one : zero; + }, + (result) -> { + numVerticesHolder.value = result.get(); + } + ); + + return new SequenceBlock( + reduceNumVerticesPiece, + new SHPInitializePiece<>( + initialNumBuckets, + bucketAssigner, + numVerticesHolder + ) + ); + } + + @Override + public VertexSender getVertexSender( + BlockWorkerSendApi workerApi, + SHPExecutionStage executionStage + ) { + RecursiveSettings recSettings = + SocialHashPartitionerSettings.RECURSIVE.createObject( + workerApi.getConf()); + Int2IntFunction toBucketAtLevel0 = + recSettings.getPreviousBucketToBucketAtLevel(0); + + return (vertex) -> { + V vertexValue = vertex.getValue(); + if (!vertexValue.getShouldVertexBeAssignedBucket()) { + return; + } + final int notSet = SocialHashPartitionerVertexValue.BUCKET_NOT_SET; + if (vertexValue.getCurrentBucket() != notSet || + vertexValue.getCandidateBucket() != notSet) { + Preconditions.checkState( + 0 <= vertexValue.getCurrentBucket() && + vertexValue.getCurrentBucket() < numInitialBuckets); + Preconditions.checkState( + 0 <= vertexValue.getCandidateBucket() && + vertexValue.getCandidateBucket() < numInitialBuckets); + return; + } + + if (toBucketAtLevel0 != null) { + int previousBucket = vertexValue.getPreviousLastLevelBucket(); + if (previousBucket != notSet) { + int initialBucket = toBucketAtLevel0.apply(previousBucket); + Preconditions.checkState( + 0 <= initialBucket && initialBucket < numInitialBuckets, + "Incorrect initialization of initialBucket - trying to assign " + + "vertex: " + vertex.getId().get() + " a bucket of : " + + initialBucket + " out of " + numInitialBuckets + " buckets" + ); + vertexValue.setInitialBucket(initialBucket, true); + } + } + + // Assign initial partitions, if not specified in the input + if (vertexValue.getInitialBucket() == notSet) { + int assignedBucket = bucketAssigner.getAssignedBucket( + vertexValue, + 0, + numInitialBuckets, + numVerticesHolder.value + ); + Preconditions.checkState( + assignedBucket != notSet, + "Couldn't assign appropriate bucket to vertex " + + vertex.getId().get() + ); + vertexValue.setInitialBucket(assignedBucket, false); + } + + vertexValue.setCandidateBucket(vertexValue.getInitialBucket()); + }; + } + + @Override + public SHPExecutionStage nextExecutionStage( + SHPExecutionStage executionStage) { + return executionStage.changedIteration(0).changedSplits(0) + .changedNumBuckets(numInitialBuckets); + } +} diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/assignment/package-info.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/assignment/package-info.java new file mode 100644 index 000000000..9ec047dd5 --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/assignment/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Abstraction of assigning initial partitions + */ +package org.apache.giraph.block_app.library.partitioning.assignment; diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/confs/SHPPaperConfs.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/confs/SHPPaperConfs.java new file mode 100644 index 000000000..cd0e8c8d5 --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/confs/SHPPaperConfs.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.block_app.library.partitioning.confs; + +import org.apache.giraph.block_app.framework.BlockUtils; +import org.apache.giraph.block_app.library.partitioning.SocialHashPartitionerBlockFactory; +import org.apache.giraph.block_app.library.partitioning.SocialHashPartitionerSettings; +import org.apache.giraph.conf.BulkConfigurator; +import org.apache.giraph.conf.GiraphConfiguration; + +/** Configuration used for experiments in the corresponding paper */ +public class SHPPaperConfs implements BulkConfigurator { + + @Override + public void configure(GiraphConfiguration conf) { + BlockUtils.setBlockFactoryClass( + conf, SocialHashPartitionerBlockFactory.class); + + SocialHashPartitionerSettings.MAX_MOVE_RATIO.setIfUnset(conf, 0.1f); + SocialHashPartitionerSettings.IGNORE_MOVE_GAIN_THRESHOLD.setIfUnset( + conf, 0); + SocialHashPartitionerSettings.SAMPLING_MOVE_GAIN_THRESHOLD.setIfUnset( + conf, 0.001f); + SocialHashPartitionerSettings.CONVERGE_OBJECTIVE_THRESHOLD.setIfUnset( + conf, 0); + SocialHashPartitionerSettings.CONVERGE_MOVE_THRESHOLD.setIfUnset(conf, 0); + SocialHashPartitionerSettings.ALLOWED_IMBALANCE.setIfUnset(conf, 0.045f); + SocialHashPartitionerSettings.GAIN_HISTOGRAM_EXPONENT.setIfUnset( + conf, (float) Math.pow(2, 0.5)); + SocialHashPartitionerSettings.USE_FINAL_P_FANOUT.setIfUnset(conf, true); + SocialHashPartitionerSettings.MOVE_PROBABILITY.setIfUnset(conf, 0.8f); + + SocialHashPartitionerSettings.NUM_ITERATIONS_PER_SPLIT.setIfUnset( + conf, 20); + SocialHashPartitionerSettings.FANOUT_PROBABILITY.setCodeSnippetIfUnset( + conf, "0.5f"); + } +} diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/confs/package-info.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/confs/package-info.java new file mode 100644 index 000000000..3c36fbdb3 --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/confs/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Common configurations + */ +package org.apache.giraph.block_app.library.partitioning.confs; diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/decide/HistogramDesc.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/decide/HistogramDesc.java new file mode 100644 index 000000000..b8ce6157d --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/decide/HistogramDesc.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.block_app.library.partitioning.decide; + +import org.apache.giraph.function.primitive.func.Byte2LongFunction; + +/** Histogram description */ +public interface HistogramDesc { + byte toBin(float value); + float smallestValue(byte bin); + + byte largestIndex(); + byte smallestIndex(); + + void computeWhatToSwap( + int[] bucket, long[] bucketSize, float allowedSize, + float moveProbability, float maxMoveRatio, + Byte2LongFunction[] getMoveCandidates, + byte[] bins, float[] probs); +} diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/decide/HistogramDescImpl.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/decide/HistogramDescImpl.java new file mode 100644 index 000000000..7e49dbc4e --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/decide/HistogramDescImpl.java @@ -0,0 +1,364 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.block_app.library.partitioning.decide; + +import java.util.Arrays; +import java.util.stream.Collectors; +import java.util.stream.IntStream; +import java.util.stream.LongStream; + +import org.apache.giraph.block_app.library.partitioning.SocialHashPartitionerSettings; +import org.apache.giraph.function.primitive.IntConsumer; +import org.apache.giraph.function.primitive.func.Byte2LongFunction; +import org.apache.giraph.function.primitive.func.Double2DoubleFunction; +import org.apache.log4j.Logger; + +import com.google.common.base.Preconditions; + +/** Historgam Description Implementation */ +public class HistogramDescImpl implements HistogramDesc { + protected static final float NEG_SYM_OFFSET = 0.999f; + + private static final Logger LOG = Logger.getLogger(HistogramDescImpl.class); + + private final float[] binThresholds; + + public HistogramDescImpl(float[] binThresholds) { + Preconditions.checkState(binThresholds.length < Byte.MAX_VALUE - 1); + this.binThresholds = binThresholds; + } + + public static HistogramDescImpl create(int quarterBins, float exponent) { + int middle = quarterBins * 2; + + float[] binThresholds = new float[4 * quarterBins + 1]; + + binThresholds[middle] = 0; + for (int i = 1; i <= 2 * quarterBins; i++) { + binThresholds[middle + i] = (float) Math.pow(exponent, i - quarterBins); + binThresholds[middle - i] = - NEG_SYM_OFFSET * binThresholds[middle + i]; + } + binThresholds[middle + 1] *= 1e-10; + return new HistogramDescImpl(binThresholds); + } + + @Override + public byte toBin(float value) { + int bin = Arrays.binarySearch(binThresholds, value); + // Binary search returns -insertion point - 1 if the value is not found. + if (bin < 0) { + return (byte) (- bin - 1); + } else { + // boundaries are in the higher bucket + return (byte) (bin + 1); + } + } + + @Override + public float smallestValue(byte bin) { + return bin > 0 ? binThresholds[bin - 1] : Float.NEGATIVE_INFINITY; + } + + @Override + public byte largestIndex() { + return (byte) (binThresholds.length); + } + + @Override + public byte smallestIndex() { + return 0; + } + + @Override + public void computeWhatToSwap( + int[] bucket, long[] bucketSize, float allowedSize, + float moveProbability, float maxMoveRatio, + Byte2LongFunction[] getMoveCandidates, + byte[] bins, float[] probs) { + int left = 0; + int right = 1; + long roundedAllowedSize = Math.max( + (long) allowedSize - 2, + (bucketSize[left] + bucketSize[right] + 1) / 2); + + byte[] lastBin = new byte[2]; + float[] lastBinProb = new float[2]; + + Byte2LongFunction[] newGetMoveCandidates = + Arrays.copyOf(getMoveCandidates, 2); + + if (maxMoveRatio < 1) { + long maxMove = (long) (roundedAllowedSize * maxMoveRatio); + for (int side = 0; side < 2; side++) { + long sum = 0; + long[] moveCandidates = new long[largestIndex() + 1]; + for (byte bin = largestIndex(); bin >= smallestIndex(); bin--) { + moveCandidates[bin] = getMoveCandidates[side].apply(bin); + if (sum + moveCandidates[bin] >= maxMove) { + lastBin[side] = bin; + lastBinProb[side] = + ((float) (maxMove - sum)) / moveCandidates[bin]; + Preconditions.checkState( + !Float.isNaN(lastBinProb[side]) && lastBinProb[side] <= 1.0 && + lastBinProb[side] >= 0.0); + moveCandidates[bin] = maxMove - sum; + break; + } + sum += moveCandidates[bin]; + } + Preconditions.checkState( + LongStream.of(moveCandidates).sum() <= maxMove); + newGetMoveCandidates[side] = (bin) -> moveCandidates[bin]; + } + } + + computeWhatToSwapImpl( + bucket, bucketSize, roundedAllowedSize, moveProbability, + newGetMoveCandidates, bins, probs); + + if (maxMoveRatio < 1) { + for (int side = 0; side < 2; side++) { + if (bins[bucket[side]] < lastBin[side]) { + Preconditions.checkState(bins[bucket[side]] >= lastBin[side]); + } + if (bins[bucket[side]] == lastBin[side]) { + probs[bucket[side]] *= lastBinProb[side]; + Preconditions.checkState(!Float.isNaN(probs[bucket[side]])); + } + } + } + + // Following are just consistency checks + for (int side = 0; side < 2; side++) { + Preconditions.checkState(bins[bucket[side]] > largestIndex() || + newGetMoveCandidates[side].apply(bins[bucket[side]]) > 0); + } + + double[] movingTo = new double[2]; + for (int side = 0; side < 2; side++) { + for (byte bin = bins[bucket[side]]; bin <= largestIndex(); bin++) { + long candidates = getMoveCandidates[side].apply(bin); + if (bin == bins[bucket[side]]) { + movingTo[side] += candidates * probs[bucket[side]]; + } else { + movingTo[side] += candidates; + } + } + } + double[] curSize = new double[2]; + curSize[left] = bucketSize[left] + movingTo[left] - movingTo[right]; + curSize[right] = bucketSize[right] + movingTo[right] - movingTo[left]; + + double[] curSizeProb = new double[2]; + curSizeProb[left] = + bucketSize[left] + + (movingTo[left] - movingTo[right]) * moveProbability; + curSizeProb[right] = + bucketSize[right] + + (movingTo[right] - movingTo[left]) * moveProbability; + + boolean shouldLog = + SocialHashPartitionerSettings.IS_DEBUG || bucket[right] < 4; + if (shouldLog) { + LOG.info("Rounded allowed size " + roundedAllowedSize + ", buckets: " + + Arrays.toString(bucket) + ", sizes: " + Arrays.toString(bucketSize) + + ", expected: " + Arrays.toString(curSize) + + ", expected with mov_prob: " + Arrays.toString(curSizeProb) + + ", moving to: " + Arrays.toString(movingTo)); + } + + Preconditions.checkState( + roundedAllowedSize + 0.01 >= curSize[left] || + roundedAllowedSize + 0.01 >= curSize[right]); + + double[] checkSize = + (bucketSize[left] <= roundedAllowedSize && + bucketSize[right] <= roundedAllowedSize) ? + curSize : curSizeProb; + + Double2DoubleFunction imprecision = (val) -> val * 1.00001 + 0.01; + for (int side = 0; side < 2; side++) { + if (imprecision.apply(roundedAllowedSize) < checkSize[side]) { + if (checkSize[side] > imprecision.apply(bucketSize[side])) { + Preconditions.checkState( + checkSize[side] <= bucketSize[side] + 0.1, + Arrays.toString(curSize) + "\n" + + Arrays.toString(bucket) + "\n" + + Arrays.toString(bucketSize) + "\n" + + allowedSize + " " + moveProbability + " " + maxMoveRatio + "\n" + + IntStream.range(smallestIndex(), largestIndex() + 1) + .mapToObj(i -> "" + getMoveCandidates[left].apply((byte) i)) + .collect(Collectors.joining(", ")) + "\n" + + IntStream.range(smallestIndex(), largestIndex() + 1) + .mapToObj(i -> "" + getMoveCandidates[right].apply((byte) i)) + .collect(Collectors.joining(", ")) + "\n" + + Arrays.toString(bins) + "\n" + Arrays.toString(probs)); + } + } + } + } + + private void computeWhatToSwapImpl( + int[] bucket, long[] bucketSize, long roundedAllowedSize, + float moveProbability, Byte2LongFunction[] getMoveCandidates, + byte[] bins, float[] probs) { + int left = 0; + int right = 1; + + if (bucketSize[left] <= roundedAllowedSize && + bucketSize[right] <= roundedAllowedSize) { + // if we are under limits, exclude probability, to keep expectation + // within it, but use it if we are not - as balancing otherwise wouldn't + // be enough. + moveProbability = 1; + } + + byte[] nextIndex = new byte[2]; + byte[] lastUsedIndex = new byte[2]; + long[] movingTo = new long[2]; + double[] curSize = new double[2]; + for (int side = 0; side < 2; side++) { + nextIndex[side] = largestIndex(); + lastUsedIndex[side] = (byte) (largestIndex() + 1); + } + + IntConsumer fillMoveAllPicked = side -> { + bins[bucket[side]] = lastUsedIndex[side]; + Preconditions.checkState(bins[bucket[side]] > largestIndex() || + getMoveCandidates[side].apply(bins[bucket[side]]) > 0); + probs[bucket[side]] = 1; + }; + + while (true) { + int to; + int from; + curSize[left] = bucketSize[left] + + (movingTo[left] - movingTo[right]) * moveProbability; + curSize[right] = bucketSize[right] + + (movingTo[right] - movingTo[left]) * moveProbability; + + Preconditions.checkState( + roundedAllowedSize >= curSize[left] || + roundedAllowedSize >= curSize[right]); + + if (roundedAllowedSize < curSize[left] || + roundedAllowedSize < curSize[right]) { + // if over imbalance, pick side to balance + if (roundedAllowedSize < curSize[left]) { + to = right; + from = left; + } else { + to = left; + from = right; + } + + // if we moved too much, and gains are not positive any more, end + // otherwise we will balance with negatives too + if (lastUsedIndex[from] <= largestIndex() && + (nextIndex[to] < 0 || + (smallestValue((byte) (nextIndex[from] + 1)) + + smallestValue(nextIndex[to]) <= 0))) { + fillMoveAllPicked.apply(to); + fillMoveAllPicked.apply(from); + + float candidates = + getMoveCandidates[from].apply(lastUsedIndex[from]) * + moveProbability; + Preconditions.checkState(candidates > 0); + double toLeave = curSize[from] - roundedAllowedSize; + if (toLeave >= candidates) { + bins[bucket[from]]++; + while (bins[bucket[from]] <= largestIndex() && + getMoveCandidates[from].apply(bins[bucket[from]]) == 0) { + bins[bucket[from]]++; + } + Preconditions.checkState( + bins[bucket[from]] > largestIndex() || + getMoveCandidates[from].apply(bins[bucket[from]]) > 0); + } else { + probs[bucket[from]] = (float) (1 - toLeave / candidates); + Preconditions.checkState( + (!Float.isNaN(probs[bucket[from]])) && + (probs[bucket[from]] >= 0)); + } + return; + } + } else { + // if balanced enough, pick higher gain + if (nextIndex[left] == nextIndex[right]) { + // if gains equal, pick first going to the smaller bucket: + if (curSize[left] < curSize[right]) { + to = left; + from = right; + } else { + to = right; + from = left; + } + } else if (nextIndex[left] >= nextIndex[right]) { + to = left; + from = right; + } else { + to = right; + from = left; + } + + Preconditions.checkState(nextIndex[to] > 0); + float nextMinGain = smallestValue(nextIndex[to]); + + // If best gain is negative, end + if (nextMinGain < 0) { + fillMoveAllPicked.apply(from); + fillMoveAllPicked.apply(to); + return; + } + + // if best gain is zero, only balance and end + if (nextMinGain == 0) { + fillMoveAllPicked.apply(from); + long candidates = getMoveCandidates[to].apply(nextIndex[to]); + if (curSize[to] + 1 < curSize[from] && candidates > 0) { + bins[bucket[to]] = nextIndex[to]; + Preconditions.checkState( + bins[bucket[to]] > largestIndex() || + getMoveCandidates[to].apply(bins[bucket[to]]) > 0); + probs[bucket[to]] = + ((float) Math.min( + candidates, + (curSize[from] - curSize[to]) / 2)) / candidates; + Preconditions.checkState(!Float.isNaN(probs[bucket[to]])); + } else { + fillMoveAllPicked.apply(to); + } + return; + } + } + + if (nextIndex[to] < 0) { + fillMoveAllPicked.apply(left); + fillMoveAllPicked.apply(right); + return; + } + long candidates = getMoveCandidates[to].apply(nextIndex[to]); + if (candidates > 0) { + lastUsedIndex[to] = nextIndex[to]; + movingTo[to] += candidates; + } + nextIndex[to]--; + } + } +} diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/decide/SHPDecideUtils.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/decide/SHPDecideUtils.java new file mode 100644 index 000000000..91100ec10 --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/decide/SHPDecideUtils.java @@ -0,0 +1,329 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.block_app.library.partitioning.decide; + +import java.util.concurrent.ThreadLocalRandom; + +import org.apache.giraph.block_app.framework.api.BlockMasterApi; +import org.apache.giraph.block_app.framework.api.BlockWorkerReceiveApi; +import org.apache.giraph.block_app.framework.api.BlockWorkerSendApi; +import org.apache.giraph.block_app.framework.api.CreateReducersApi; +import org.apache.giraph.block_app.framework.block.Block; +import org.apache.giraph.block_app.framework.piece.Piece; +import org.apache.giraph.block_app.framework.piece.global_comm.BroadcastHandle; +import org.apache.giraph.block_app.framework.piece.global_comm.array.ReducerArrayHandle; +import org.apache.giraph.block_app.framework.piece.global_comm.map.ReducerMapHandle; +import org.apache.giraph.block_app.framework.piece.interfaces.VertexReceiver; +import org.apache.giraph.block_app.framework.piece.interfaces.VertexSender; +import org.apache.giraph.block_app.library.partitioning.SHPExecutionStage; +import org.apache.giraph.block_app.library.partitioning.SHPLoggingBuilder; +import org.apache.giraph.block_app.library.partitioning.SocialHashPartitionerSettings; +import org.apache.giraph.block_app.library.partitioning.recursive.MoveBucketLimiter; +import org.apache.giraph.block_app.library.partitioning.recursive.MoveBucketLimiterImpl; +import org.apache.giraph.block_app.library.partitioning.recursive.RecursiveSettings; +import org.apache.giraph.block_app.library.partitioning.vertex.SocialHashPartitionerVertexValue; +import org.apache.giraph.block_app.reducers.array.ArrayOfHandles; +import org.apache.giraph.block_app.reducers.array.BasicArrayReduce; +import org.apache.giraph.block_app.reducers.map.BasicMapReduce; +import org.apache.giraph.conf.GiraphConfiguration; +import org.apache.giraph.function.primitive.func.Byte2LongFunction; +import org.apache.giraph.function.primitive.func.Int2FloatFunction; +import org.apache.giraph.reducers.impl.SumReduce; +import org.apache.giraph.types.NoMessage; +import org.apache.giraph.types.ops.ByteTypeOps; +import org.apache.giraph.types.ops.LongTypeOps; +import org.apache.hadoop.io.ByteWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.log4j.Logger; + +import com.google.common.base.Preconditions; + +/** Utilities for deciding which vertices are going to move */ +public class SHPDecideUtils { + private static final Logger LOG = Logger.getLogger(SHPDecideUtils.class); + + private SHPDecideUtils() { } + + public static Block createDecideBlock( + GiraphConfiguration conf, RecursiveSettings recursive) { + HistogramDesc histogramDesc = HistogramDescImpl.create( + SocialHashPartitionerSettings.GAIN_HISTOGRAM_NUM_QUARTER_BINS.get(conf), + SocialHashPartitionerSettings.GAIN_HISTOGRAM_EXPONENT.get(conf)); + MoveBucketLimiter limiter = new MoveBucketLimiterImpl(); + + float moveProbability = + SocialHashPartitionerSettings.MOVE_PROBABILITY.get(conf); + Preconditions.checkState( + moveProbability <= 1 && moveProbability > 0); + float maxMoveRatio = + SocialHashPartitionerSettings.MAX_MOVE_RATIO.get(conf); + Preconditions.checkState(maxMoveRatio > 0); + float allowedImbalance = + SocialHashPartitionerSettings.ALLOWED_IMBALANCE.get(conf); + Preconditions.checkState(allowedImbalance >= 0); + + int numSplits = recursive.getNumSplits(); + Int2FloatFunction getAllowedImbalance; + if (recursive.runIterationsBeforeFirstSplit()) { + getAllowedImbalance = + (split) -> allowedImbalance * (1 + split) / (1 + numSplits); + } else { + getAllowedImbalance = (split) -> allowedImbalance * split / numSplits; + } + + return new DecideMovementPiece( + histogramDesc, limiter, moveProbability, maxMoveRatio, + getAllowedImbalance); + } + + /** Piece to decide which vertices are going to move */ + public static final class DecideMovementPiece extends + Piece { + private final HistogramDesc histogramDesc; + private final MoveBucketLimiter limiter; + private final float moveProbability; + private final float maxMoveRatio; + private final Int2FloatFunction getAllowedImbalance; + + private ReducerArrayHandle bucketSizes; + private + ArrayOfHandles> + gainHistograms; + private BroadcastHandle probsBroadcast; + private BroadcastHandle binsBroadcast; + + public DecideMovementPiece( + HistogramDesc histogramDesc, MoveBucketLimiter limiter, + float moveProbability, float maxMoveRatio, + Int2FloatFunction getAllowedImbalance) { + this.histogramDesc = histogramDesc; + this.limiter = limiter; + this.moveProbability = moveProbability; + this.maxMoveRatio = maxMoveRatio; + this.getAllowedImbalance = getAllowedImbalance; + } + + @Override + public void registerReducers( + CreateReducersApi reduceApi, SHPExecutionStage executionStage) { + bucketSizes = BasicArrayReduce.createLocalArrayHandles( + executionStage.getNumBuckets(), + LongTypeOps.INSTANCE, SumReduce.LONG, reduceApi); + + gainHistograms = new ArrayOfHandles<>( + executionStage.getNumBuckets(), + () -> BasicMapReduce.createLocalMapHandles( + ByteTypeOps.INSTANCE, LongTypeOps.INSTANCE, SumReduce.LONG, + reduceApi)); + } + + @Override + public + VertexSender + getVertexSender( + BlockWorkerSendApi workerApi, + SHPExecutionStage executionStage) { + ByteWritable bin = new ByteWritable(); + return (vertex) -> { + if (!vertex.getValue().getShouldVertexBeAssignedBucket()) { + return; + } + Preconditions.checkState( + vertex.getValue().getCandidateBucket() == + vertex.getValue().getCurrentBucket()); + int bucket = vertex.getValue().getCandidateBucket(); + float gain = vertex.getValue().getBisectionMoveGain(); + + reduceLong(bucketSizes.get(bucket), 1); + bin.set(histogramDesc.toBin(gain)); + + if (SocialHashPartitionerSettings.IS_DEBUG) { + LOG.info(String.format( + "Vertex %d with gains %f assigned to bin %d, which is from %f", + vertex.getId().get(), gain, bin.get(), + histogramDesc.smallestValue(bin.get()))); + } + reduceLong( + gainHistograms.get(limiter.otherBucket(bucket)).get(bin), 1); + }; + } + + @Override + public void masterCompute( + BlockMasterApi master, SHPExecutionStage executionStage) { + long[] globalBucketSizes = new long[executionStage.getNumBuckets()]; + long totalSize = logImbalanceAndComputeTotalSize( + master, executionStage, globalBucketSizes); + + final int left = 0; + final int right = 1; + + float allowedSize = + ((float) totalSize) / executionStage.getNumBuckets(); + float allowedImbalance = + getAllowedImbalance.apply(executionStage.getSplits()); + if (allowedImbalance > 0) { + allowedSize *= 1 + allowedImbalance; + } + + int[] bucket = new int[2]; + long[] bucketSize = new long[2]; + + float[] probs = new float[executionStage.getNumBuckets()]; + byte[] bins = new byte[executionStage.getNumBuckets()]; + + ReducerMapHandle[] histogram = + new ReducerMapHandle[2]; + + Byte2LongFunction[] getMoveCandidates = new Byte2LongFunction[] { + index -> getMappedByte( + histogram[left], index).getReducedValue(master).get(), + index -> getMappedByte( + histogram[right], index).getReducedValue(master).get() + }; + + for (bucket[left] = 0; bucket[left] < executionStage.getNumBuckets(); + bucket[left] += 2) { + Preconditions.checkState( + bucket[left] == limiter.startBucket(bucket[left])); + bucket[right] = limiter.endBucket(bucket[left]); + + boolean shouldLog = + SocialHashPartitionerSettings.IS_DEBUG || bucket[right] < 4; + + for (int side = 0; side < 2; side++) { + bucketSize[side] = globalBucketSizes[bucket[side]]; + + histogram[side] = gainHistograms.get(bucket[side]); + + if (shouldLog) { + StringBuilder sb = new StringBuilder( + "Gains to bucket " + bucket[side] + ": "); + for (byte bin = histogramDesc.smallestIndex(); + bin <= histogramDesc.largestIndex(); bin++) { + sb.append(String.format( + "%4.3f+ %3d ", + histogramDesc.smallestValue(bin), + getMoveCandidates[side].apply(bin))); + } + LOG.info(sb); + } + } + + histogramDesc.computeWhatToSwap( + bucket, bucketSize, allowedSize, moveProbability, maxMoveRatio, + getMoveCandidates, bins, probs); + + if (shouldLog) { + for (int side = 0; side < 2; side++) { + byte bin = bins[bucket[side]]; + LOG.info(String.format( + "Decided to move to %d: from bin %d (>=%f) with prob %f", + bucket[side], bin, + bin <= histogramDesc.largestIndex() ? + histogramDesc.smallestValue(bin) : + Float.POSITIVE_INFINITY, probs[bucket[side]])); + } + } + } + probsBroadcast = master.broadcast(probs); + binsBroadcast = master.broadcast(bins); + } + + private long logImbalanceAndComputeTotalSize( + BlockMasterApi master, SHPExecutionStage executionStage, + long[] globalBucketSizes) { + long totalSize = 0; + + long maxBucketSize = 0; + long maxBucketSizeIndex = 0; + long sumBucketSize = 0; + + for (int i = 0; i < executionStage.getNumBuckets(); i++) { + globalBucketSizes[i] = + bucketSizes.get(i).getReducedValue(master).get(); + totalSize += globalBucketSizes[i]; + + if (maxBucketSize < globalBucketSizes[i]) { + maxBucketSize = globalBucketSizes[i]; + maxBucketSizeIndex = i; + } + maxBucketSize = Math.max(maxBucketSize, globalBucketSizes[i]); + sumBucketSize += globalBucketSizes[i]; + } + + float imbalance = maxBucketSize / + (((float) sumBucketSize) / executionStage.getNumBuckets()); + SHPLoggingBuilder.setCounter( + "imbalance", imbalance, master, executionStage); + + new SHPLoggingBuilder() + .appendLine( + "Current imbalance [" + maxBucketSizeIndex + "]: " + imbalance + + " = " + maxBucketSize + " / " + sumBucketSize + " / " + + executionStage.getNumBuckets()) + .logToCommandLine(master); + return totalSize; + } + + @Override + public + VertexReceiver + getVertexReceiver(BlockWorkerReceiveApi workerApi, + SHPExecutionStage executionStage) { + return (vertex, messages) -> { + if (!vertex.getValue().getShouldVertexBeAssignedBucket()) { + return; + } + + float gain = vertex.getValue().getBisectionMoveGain(); + byte bin = histogramDesc.toBin(gain); + + int targetBucket = + limiter.otherBucket(vertex.getValue().getCandidateBucket()); + + byte moveBin = binsBroadcast.getBroadcast(workerApi)[targetBucket]; + float prob; + if (moveBin < bin) { + prob = 1; + } else if (moveBin == bin) { + prob = probsBroadcast.getBroadcast(workerApi)[targetBucket]; + } else { + prob = 0; + } + prob *= moveProbability; + + if (0 < prob && + (1 <= prob || ThreadLocalRandom.current().nextFloat() < prob)) { + if (SocialHashPartitionerSettings.IS_DEBUG) { + LOG.info( + "Moving " + vertex.getId() + " from " + + vertex.getValue().getCandidateBucket() + " to " + + targetBucket + " with gain " + gain); + } + vertex.getValue().setCandidateBucket(targetBucket); + } + }; + } + } + +} diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/decide/package-info.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/decide/package-info.java new file mode 100644 index 000000000..f2be47545 --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/decide/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Decide pieces and utilities + */ +package org.apache.giraph.block_app.library.partitioning.decide; diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/goal/SHPFanoutOptimizationGoal.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/goal/SHPFanoutOptimizationGoal.java new file mode 100644 index 000000000..3d1dc4d40 --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/goal/SHPFanoutOptimizationGoal.java @@ -0,0 +1,602 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.block_app.library.partitioning.goal; + +import java.util.Iterator; +import java.util.concurrent.ThreadLocalRandom; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import org.apache.giraph.block_app.framework.api.BlockMasterApi; +import org.apache.giraph.block_app.framework.api.BlockWorkerReceiveApi; +import org.apache.giraph.block_app.framework.api.BlockWorkerSendApi; +import org.apache.giraph.block_app.framework.api.CreateReducersApi; +import org.apache.giraph.block_app.framework.block.Block; +import org.apache.giraph.block_app.framework.block.SequenceBlock; +import org.apache.giraph.block_app.framework.piece.Piece; +import org.apache.giraph.block_app.framework.piece.global_comm.ReducerHandle; +import org.apache.giraph.block_app.framework.piece.global_comm.array.ReducerArrayHandle; +import org.apache.giraph.block_app.framework.piece.interfaces.VertexReceiver; +import org.apache.giraph.block_app.framework.piece.interfaces.VertexSender; +import org.apache.giraph.block_app.library.partitioning.SHPExecutionStage; +import org.apache.giraph.block_app.library.partitioning.SHPLoggingBuilder; +import org.apache.giraph.block_app.library.partitioning.SocialHashPartitionerSettings; +import org.apache.giraph.block_app.library.partitioning.goal.cache.CachedGeneratedArray; +import org.apache.giraph.block_app.library.partitioning.recursive.MoveBucketLimiter; +import org.apache.giraph.block_app.library.partitioning.recursive.MoveBucketLimiterImpl; +import org.apache.giraph.block_app.library.partitioning.recursive.RecursiveSettings; +import org.apache.giraph.block_app.library.partitioning.vertex.CachedNeighborData; +import org.apache.giraph.block_app.library.partitioning.vertex.SocialHashPartitionerVertexValue; +import org.apache.giraph.block_app.library.partitioning.vertex.SocialHashPartitionerVertexValueImpl; +import org.apache.giraph.block_app.reducers.array.BasicArrayReduce; +import org.apache.giraph.combiner.MessageCombiner; +import org.apache.giraph.combiner.SumMessageCombiner; +import org.apache.giraph.conf.GiraphConfiguration; +import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration; +import org.apache.giraph.function.Consumer; +import org.apache.giraph.function.ObjectTransfer; +import org.apache.giraph.function.Supplier; +import org.apache.giraph.function.primitive.Obj2FloatFunction; +import org.apache.giraph.reducers.impl.SumReduce; +import org.apache.giraph.types.ops.LongTypeOps; +import org.apache.giraph.types.ops.collections.ResettableIterator; +import org.apache.giraph.types.ops.collections.array.WLongArrayList; +import org.apache.hadoop.io.DoubleWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.log4j.Logger; + +import com.google.common.base.Preconditions; + +/** + * Fanout optimization goal + */ +public class SHPFanoutOptimizationGoal implements SHPOptimizationGoal { + private static final Logger LOG = + Logger.getLogger(SHPFanoutOptimizationGoal.class); + + /** Formula for computing goal */ + private interface GoalFormula { + double comparisonEval(int neighbors, int totalNeighbors); + double toObjective(double comparisonEval); + float scaleThreshold(float threshold); + } + + /** + * Specialization of goal formulas for when p->0. + * This is equivelent to clique-net objective. + */ + private static class GoalFanoutP0 implements GoalFormula { + @Override + public double comparisonEval(int n, int t) { + return n * n * (t > 1 ? (1.0 / (t - 1)) : 1); + } + + @Override + public double toObjective(double comparisonEval) { + return comparisonEval; + } + + @Override + public float scaleThreshold(float threshold) { + return threshold; + } + } + + /** + * Specialization of goal formulas when p=1. + * This is equivalent to fanout objective. + */ + private static class GoalFanoutP1 implements GoalFormula { + @Override + public double comparisonEval(int n, int t) { + return n > 0 ? - 1 : 0; + } + + @Override + public double toObjective(double comparisonEval) { + return - comparisonEval; + } + + @Override + public float scaleThreshold(float threshold) { + return threshold; + } + } + + /** + * Specialization of goal formulas when p in (0, 1). + * This represents p-fanout objective. + */ + private static class GoalFanoutP implements GoalFormula { + private final float fanoutProbability; + private final int splitInto; + private final CachedGeneratedArray exponentCache; + + public GoalFanoutP(float fanoutProbability, int splitInto) { + fanoutProbability /= splitInto; + this.fanoutProbability = fanoutProbability; + this.splitInto = splitInto; + exponentCache = new CachedGeneratedArray( + 16000, n -> Math.pow(1 - this.fanoutProbability, n)); + } + + @Override + public double comparisonEval(int n, int t) { + return splitInto * exponentCache.apply(n); + } + + @Override + public double toObjective(double comparisonEval) { + return splitInto - comparisonEval; + } + + @Override + public float scaleThreshold(float threshold) { + return threshold; + } + } + + private static GoalFormula createFormula( + float fanoutProbability, int splitInto) { + if (fanoutProbability == 0) { + return new GoalFanoutP0(); + } else if (fanoutProbability == 1 && splitInto == 1) { + return new GoalFanoutP1(); + } else { + return new GoalFanoutP(fanoutProbability, splitInto); + } + } + + @Override + public Block createMoveBlock( + GiraphConfiguration conf, Consumer converged) { + RecursiveSettings recursive = + SocialHashPartitionerSettings.RECURSIVE.createObject(conf); + + MoveBucketLimiter bucketLimiter = new MoveBucketLimiterImpl(); + ObjectTransfer> transferMessages = + new ObjectTransfer<>(); + + return new SequenceBlock( + new SendFromDataToQueryVertices( + transferMessages, converged, + SocialHashPartitionerSettings.CONVERGE_MOVE_THRESHOLD.get(conf)), + new SendFromQueryToDataVertices( + transferMessages, converged, + SocialHashPartitionerSettings.USE_FINAL_P_FANOUT.get(conf) ? + recursive.getFinalNumBuckets() : -1, + bucketLimiter, + SocialHashPartitionerSettings.CONVERGE_OBJECTIVE_THRESHOLD.get(conf), + SocialHashPartitionerSettings.IGNORE_MOVE_GAIN_THRESHOLD.get(conf), + SocialHashPartitionerSettings.SAMPLING_MOVE_GAIN_THRESHOLD.get(conf)) + ); + } + + @Override + public Class getVertexValueClass( + GiraphConfiguration conf) { + return SocialHashPartitionerVertexValueImpl.class; + } + + /** Piece for sending requests from data to query vertices */ + public static final class SendFromDataToQueryVertices extends + Piece { + private final Consumer> messagesConsumer; + private final Consumer converged; + private final float convergeMoveThreshold; + private ReducerHandle countVertices; + private ReducerHandle countEdges; + + private ReducerHandle countMoved; + private ReducerArrayHandle countMovedTo; + + public SendFromDataToQueryVertices( + Consumer> messagesConsumer, + Consumer converged, + float convergeMoveThreshold) { + this.messagesConsumer = messagesConsumer; + this.converged = converged; + this.convergeMoveThreshold = convergeMoveThreshold; + } + + @Override + public void registerReducers( + CreateReducersApi reduceApi, SHPExecutionStage executionStage) { + countVertices = reduceApi.createLocalReducer(SumReduce.LONG); + countEdges = reduceApi.createLocalReducer(SumReduce.LONG); + + countMoved = reduceApi.createLocalReducer(SumReduce.LONG); + countMovedTo = BasicArrayReduce.createLocalArrayHandles( + executionStage.getNumBuckets(), + LongTypeOps.INSTANCE, SumReduce.LONG, reduceApi); + } + + @Override + public + VertexSender + getVertexSender( + BlockWorkerSendApi workerApi, + SHPExecutionStage executionStage) { + VertexBucketMessage message = new VertexBucketMessage(); + return (vertex) -> { + if (!vertex.getValue().getShouldVertexBeAssignedBucket()) { + return; + } + + reduceLong(countVertices, 1); + reduceLong(countEdges, vertex.getNumEdges()); + + int bucket = vertex.getValue().getCandidateBucket(); + if (vertex.getValue().getCurrentBucket() != bucket) { + if (SocialHashPartitionerSettings.IS_DEBUG) { + LOG.info( + "Moved " + vertex.getId() + " from " + + vertex.getValue().getCurrentBucket() + " to " + bucket); + } + reduceLong(countMoved, 1); + reduceLong(countMovedTo.get(bucket), 1); + vertex.getValue().setCurrentBucket(bucket); + + message.set(vertex.getId().get(), bucket); + workerApi.sendMessageToMultipleEdges( + vertex.getValue().getUsefulEdgeIdIterator( + vertex.getEdges().iterator()), message); + } + }; + } + + @Override + public void masterCompute( + BlockMasterApi masterApi, SHPExecutionStage executionStage) { + long moved = countMoved.getReducedValue(masterApi).get(); + long total = countVertices.getReducedValue(masterApi).get(); + SHPLoggingBuilder.setCounter( + "moved vertices", moved, masterApi, executionStage); + SHPLoggingBuilder sb = new SHPLoggingBuilder(); + sb.appendLine("Moved %d out of %d vertices, total %d edges", + moved, total, countEdges.getReducedValue(masterApi).get()); + + if (SocialHashPartitionerSettings.IS_DEBUG) { + sb.appendLine("Moved to: " + + IntStream.range(0, executionStage.getNumBuckets()) + .mapToObj(i -> countMovedTo.get(i).getReducedValue( + masterApi).toString()) + .collect(Collectors.joining(", "))); + } + + if (converged != null && moved < convergeMoveThreshold * total) { + converged.apply(true); + sb.appendLine("Converged!"); + } + sb.logToCommandLine(masterApi); + } + + @Override + public + VertexReceiver + getVertexReceiver( + BlockWorkerReceiveApi workerApi, + SHPExecutionStage executionStage) { + return (vertex, messages) -> { + messagesConsumer.apply(messages); + }; + } + + @Override + protected Class getMessageClass() { + return VertexBucketMessage.class; + } + + @Override + protected boolean allowOneMessageToManyIdsEncoding() { + return true; + } + } + + /** Piece for sending back replies from query to data vertices */ + public static final class SendFromQueryToDataVertices extends + Piece { + private final Supplier> messagesSupplier; + private final Consumer converged; + private final int finalNumBuckets; + private final MoveBucketLimiter bucketLimiter; + private final float convergeObjectiveThreshold; + private final float ignoreMoveGainThreshold; + private final float samplingMoveGainThreshold; + + private double previousAffectableObjective = Double.POSITIVE_INFINITY; + + private ReducerHandle numQueryVertices; + private ReducerHandle sumFanout; + private ReducerHandle sumAffectableFanout; + private ReducerHandle sumObjective; + private + ReducerHandle sumAffectableObjective; + private ReducerHandle msgsSent; + private ReducerHandle msgGroupsSent; + private ReducerHandle msgsSampled; + private ReducerHandle msgsSkipped; + + public SendFromQueryToDataVertices( + Supplier> messagesSupplier, + Consumer converged, + int finalNumBuckets, MoveBucketLimiter bucketLimiter, + float convergeObjectiveThreshold, + float ignoreMoveGainThreshold, float samplingMoveGainThreshold) { + this.messagesSupplier = messagesSupplier; + this.converged = converged; + this.finalNumBuckets = finalNumBuckets; + this.bucketLimiter = bucketLimiter; + this.convergeObjectiveThreshold = convergeObjectiveThreshold; + this.ignoreMoveGainThreshold = ignoreMoveGainThreshold; + this.samplingMoveGainThreshold = samplingMoveGainThreshold; + } + + @Override + public void registerReducers( + CreateReducersApi reduceApi, SHPExecutionStage executionStage) { + numQueryVertices = reduceApi.createLocalReducer(SumReduce.LONG); + sumFanout = reduceApi.createLocalReducer(SumReduce.LONG); + sumAffectableFanout = reduceApi.createLocalReducer(SumReduce.LONG); + sumObjective = reduceApi.createLocalReducer(SumReduce.DOUBLE); + sumAffectableObjective = reduceApi.createLocalReducer(SumReduce.DOUBLE); + + msgsSent = reduceApi.createLocalReducer(SumReduce.LONG); + msgGroupsSent = reduceApi.createLocalReducer(SumReduce.LONG); + msgsSampled = reduceApi.createLocalReducer(SumReduce.LONG); + msgsSkipped = reduceApi.createLocalReducer(SumReduce.LONG); + } + + @Override + public + VertexSender + getVertexSender( + BlockWorkerSendApi workerApi, + SHPExecutionStage executionStage) { + WLongArrayList targets = new WLongArrayList(); + ResettableIterator targetsIter = targets.fastIteratorW(); + FloatWritable messageValue = new FloatWritable(); + + Obj2FloatFunction fanoutProbabilityF = + SocialHashPartitionerSettings.FANOUT_PROBABILITY.createObject( + workerApi.getConf()); + + GoalFormula formula = createFormula( + fanoutProbabilityF.apply(executionStage), + finalNumBuckets <= 0 ? 1 : + (finalNumBuckets / executionStage.getNumBuckets())); + + double isolatedObjective = + formula.toObjective(formula.comparisonEval(0, 1)) + + formula.toObjective(formula.comparisonEval(1, 1)); + + return (vertex) -> { + Iterable messages = messagesSupplier.get(); + + CachedNeighborData neighbors = + vertex.getValue().updateNeighborData(messages.iterator()); + + long fanout = vertex.getValue().getRemovedIsolatedNeighbors(); + double objective = + vertex.getValue().getRemovedIsolatedNeighbors() * + isolatedObjective; + long affectableFanout = 0; + double affectableObjective = 0; + + boolean isQuery = vertex.getValue().getRemovedIsolatedNeighbors() > 0; + // check if it's a query vertex + if (neighbors != null) { + isQuery = true; + int start = 0; + while (start < neighbors.size()) { + int curBucket = neighbors.getBucket(start); + + int leftBucket = bucketLimiter.startBucket(curBucket); + int rightBucket = bucketLimiter.endBucket(curBucket); + + int mid = start; + while (mid < neighbors.size() && + neighbors.getBucket(mid) == leftBucket) { + mid++; + } + + int end = mid; + while (end < neighbors.size() && + neighbors.getBucket(end) == rightBucket) { + end++; + } + + int totalInPair = end - start; + double evalLeft = formula.comparisonEval(mid - start, totalInPair); + double evalRight = formula.comparisonEval(end - mid, totalInPair); + double curEval = evalLeft + evalRight; + double curObjective = + formula.toObjective(evalLeft) + formula.toObjective(evalRight); + objective += curObjective; + double curFanout = (start < mid ? 1 : 0) + (mid < end ? 1 : 0); + fanout += curFanout; + + if (end - start > 1) { + affectableObjective += curObjective - ( + formula.toObjective( + formula.comparisonEval(end - start, totalInPair)) + + formula.toObjective( + formula.comparisonEval(0, end - start))); + affectableFanout += curFanout - 1; + + sendGainsIfNeeded( + start, mid, end - mid, curEval, targets, targetsIter, + messageValue, neighbors, workerApi, formula); + + sendGainsIfNeeded( + mid, end, mid - start, curEval, targets, targetsIter, + messageValue, neighbors, workerApi, formula); + } + start = end; + } + } + + if (isQuery) { + reduceLong(numQueryVertices, 1); + reduceLong(sumFanout, fanout); + reduceLong(sumAffectableFanout, affectableFanout); + reduceDouble(sumObjective, objective); + reduceDouble(sumAffectableObjective, affectableObjective); + } + }; + } + + // CHECKSTYLE: stop ParameterNumber + private void sendGainsIfNeeded( + int rangeStart, int rangeEnd, int otherSize, double oldObjective, + WLongArrayList targets, ResettableIterator targetsIter, + FloatWritable messageValue, CachedNeighborData neighbors, + BlockWorkerSendApi workerApi, + GoalFormula formula) { + // CHECKSTYLE: resume ParameterNumber + int thisSize = rangeEnd - rangeStart; + if (0 < thisSize && otherSize + 1 != thisSize) { + double newObjective = + formula.comparisonEval(otherSize + 1, thisSize + otherSize) + + formula.comparisonEval(thisSize - 1, thisSize + otherSize); + double gain = newObjective - oldObjective; + + boolean toSend = false; + if (Math.abs(gain) > + formula.scaleThreshold(samplingMoveGainThreshold)) { + toSend = true; + } else if (Math.abs(gain) > + formula.scaleThreshold(ignoreMoveGainThreshold)) { + reduceLong(msgsSampled, thisSize); + if (ThreadLocalRandom.current().nextFloat() < + SocialHashPartitionerSettings.SAMPLING_MOVE_GAIN_RATIO) { + toSend = true; + gain /= SocialHashPartitionerSettings.SAMPLING_MOVE_GAIN_RATIO; + } + } + + if (toSend) { + messageValue.set((float) gain); + neighbors.fillIdSubset(targets, rangeStart, rangeEnd); + targetsIter.reset(); + + reduceLong(msgGroupsSent, 1); + reduceLong(msgsSent, thisSize); + workerApi.sendMessageToMultipleEdges(targetsIter, messageValue); + } else if (gain != 0) { + reduceLong(msgsSkipped, thisSize); + } + } + } + + @Override + public void masterCompute( + BlockMasterApi masterApi, SHPExecutionStage executionStage) { + long numQueries = numQueryVertices.getReducedValue(masterApi).get(); + double fanout = + ((double) sumFanout.getReducedValue(masterApi).get()) / numQueries; + double affectableFanout = + ((double) sumAffectableFanout.getReducedValue(masterApi).get()) / + numQueries; + double objective = + sumObjective.getReducedValue(masterApi).get() / numQueries; + double affectableObjective = + sumAffectableObjective.getReducedValue(masterApi).get() / numQueries; + + SHPLoggingBuilder.setCounter( + "fanout", fanout, masterApi, executionStage); + SHPLoggingBuilder.setCounter( + "objective", objective, masterApi, executionStage); + + SHPLoggingBuilder sb = new SHPLoggingBuilder(); + sb.appendLine("At: " + executionStage); + sb.appendLine("Num query vertices: " + numQueries); + sb.appendLine( + "Avg fanout: " + fanout + ", caused by current split: " + + affectableFanout); + sb.appendLine( + "Avg objective: " + objective + ", caused by current split: " + + affectableObjective); + + long sent = msgsSent.getReducedValue(masterApi).get(); + long groupsSent = msgGroupsSent.getReducedValue(masterApi).get(); + long skipped = msgsSkipped.getReducedValue(masterApi).get(); + double total = sent + skipped; + sb.appendLine( + "Msgs sent: %d, avg per group %.2f, ratio msgs skipped %.5f, ratio " + + "msgs sampled %.5f", + sent, ((double) sent) / groupsSent, + skipped / total, + msgsSampled.getReducedValue(masterApi).get() / total); + + if (converged != null && convergeObjectiveThreshold > 0 && + Math.abs(affectableObjective - previousAffectableObjective) < + convergeObjectiveThreshold * + Math.max(objective, previousAffectableObjective)) { + converged.apply(true); + sb.appendLine("Converged!"); + } + sb.logToCommandLine(masterApi); + previousAffectableObjective = affectableObjective; + } + + @Override + public + VertexReceiver getVertexReceiver( + BlockWorkerReceiveApi workerApi, + SHPExecutionStage executionStage) { + return (vertex, messages) -> { + Iterator iter = messages.iterator(); + if (iter.hasNext()) { + vertex.getValue().setBisectionMoveGain(iter.next().get()); + if (SocialHashPartitionerSettings.IS_DEBUG) { + LOG.info( + vertex.getId() + " has gain " + + vertex.getValue().getBisectionMoveGain()); + } + Preconditions.checkState(!iter.hasNext()); + } else { + vertex.getValue().setBisectionMoveGain(0); + } + }; + } + + @Override + protected + MessageCombiner getMessageCombiner( + ImmutableClassesGiraphConfiguration conf) { + return SumMessageCombiner.FLOAT; + } + + @Override + protected boolean allowOneMessageToManyIdsEncoding() { + return true; + } + } +} diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/goal/SHPOptimizationGoal.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/goal/SHPOptimizationGoal.java new file mode 100644 index 000000000..e216ec6bb --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/goal/SHPOptimizationGoal.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.block_app.library.partitioning.goal; + +import org.apache.giraph.block_app.framework.block.Block; +import org.apache.giraph.conf.GiraphConfiguration; +import org.apache.giraph.function.Consumer; +import org.apache.hadoop.io.Writable; + +/** + * Definition of different optimization functions. + * + * Optimization function needs to define these three methods: + * + * getVertexValueClass + * createMoveBlock + */ +public interface SHPOptimizationGoal { + Class getVertexValueClass(GiraphConfiguration conf); + + Block createMoveBlock(GiraphConfiguration conf, Consumer converged); +} diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/goal/SHPOptimizationGoals.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/goal/SHPOptimizationGoals.java new file mode 100644 index 000000000..204eb476f --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/goal/SHPOptimizationGoals.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.block_app.library.partitioning.goal; + +import org.apache.giraph.block_app.framework.block.Block; +import org.apache.giraph.conf.GiraphConfiguration; +import org.apache.giraph.function.Consumer; +import org.apache.hadoop.io.Writable; + +/** List of available optimization goals */ +public enum SHPOptimizationGoals implements SHPOptimizationGoal { + FANOUT(new SHPFanoutOptimizationGoal()); + + private final SHPOptimizationGoal optimizationGoal; + + private SHPOptimizationGoals(SHPOptimizationGoal optimizationGoal) { + this.optimizationGoal = optimizationGoal; + } + + @Override + public Class getVertexValueClass( + GiraphConfiguration conf) { + return optimizationGoal.getVertexValueClass(conf); + } + + @Override + public Block createMoveBlock( + GiraphConfiguration conf, Consumer converged) { + return optimizationGoal.createMoveBlock(conf, converged); + } +} diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/goal/VertexBucketMessage.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/goal/VertexBucketMessage.java new file mode 100644 index 000000000..2ca46b0ad --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/goal/VertexBucketMessage.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.block_app.library.partitioning.goal; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.giraph.utils.Varint; +import org.apache.hadoop.io.Writable; + +/** Message containing vertex id and it's bucket */ +public final class VertexBucketMessage implements Writable { + private long id; + private int bucket; + + public VertexBucketMessage() { + } + + public long getId() { + return id; + } + + public int getBucket() { + return bucket; + } + + public void set(long id, int bucket) { + this.id = id; + this.bucket = bucket; + } + + @Override + public void write(DataOutput out) throws IOException { + Varint.writeSignedVarLong(id, out); + Varint.writeUnsignedVarInt(bucket, out); + } + + @Override + public void readFields(DataInput in) throws IOException { + id = Varint.readSignedVarLong(in); + bucket = Varint.readUnsignedVarInt(in); + } +} diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/goal/cache/CachedGeneratedArray.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/goal/cache/CachedGeneratedArray.java new file mode 100644 index 000000000..423fb2552 --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/goal/cache/CachedGeneratedArray.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.block_app.library.partitioning.goal.cache; + +import org.apache.giraph.function.primitive.func.Int2DoubleFunction; + +/** Int2DoubleFunction which caches [0, countToCache) values */ +public class CachedGeneratedArray implements Int2DoubleFunction { + private final int countToCache; + // TODO make this not serialized + private final double[] cached; + private final Int2DoubleFunction function; + + public CachedGeneratedArray( + int countToCache, Int2DoubleFunction function) { + super(); + this.countToCache = countToCache; + this.function = function; + this.cached = generateCache(); + } + + private double[] generateCache() { + double[] res = new double[countToCache]; + for (int i = 0; i < res.length; i++) { + res[i] = function.apply(i); + } + return res; + } + + @Override + public double apply(int n) { + return n < cached.length ? cached[n] : function.apply(n); + } +} diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/goal/cache/package-info.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/goal/cache/package-info.java new file mode 100644 index 000000000..52410d9a1 --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/goal/cache/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Caching of computed values + */ +package org.apache.giraph.block_app.library.partitioning.goal.cache; diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/goal/package-info.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/goal/package-info.java new file mode 100644 index 000000000..3b692b853 --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/goal/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Goal (objective) pieces and computation + */ +package org.apache.giraph.block_app.library.partitioning.goal; diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/package-info.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/package-info.java new file mode 100644 index 000000000..38c3dbf8c --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * SocialHashPartitioiner (ScalableHypergraphPartitioner) - library + * for fully distributed and scalable implementation of hypergraph + * partitioning, for optimizing for fanout objective. + */ +package org.apache.giraph.block_app.library.partitioning; diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/recursive/MoveBucketLimiter.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/recursive/MoveBucketLimiter.java new file mode 100644 index 000000000..1a8723644 --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/recursive/MoveBucketLimiter.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.block_app.library.partitioning.recursive; + +/** + * Abstracts any movement restrictions (currently supports only + * consecutive chunks restrictions). + */ +public interface MoveBucketLimiter { + /** + * Returns the first bucket allowable for a vertex + * in the currentBucket. + * + * @param currentBucket current bucket + * @return first allowable bucket + */ + int startBucket(int currentBucket); + + /** + * Returns the last bucket allowable + 1 for a vertex + * in the currentBucket. + * + * @param currentBucket current bucket + * @return last allowable bucket + 1 + */ + int endBucket(int currentBucket); + + int otherBucket(int currentBucket); + + default boolean inside(int currentBucket, int checkBucket) { + return startBucket(currentBucket) <= checkBucket && + checkBucket < endBucket(currentBucket); + } +} diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/recursive/MoveBucketLimiterImpl.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/recursive/MoveBucketLimiterImpl.java new file mode 100644 index 000000000..56059301b --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/recursive/MoveBucketLimiterImpl.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.block_app.library.partitioning.recursive; + +/** Implementation of MoveBucketLimiter */ +public class MoveBucketLimiterImpl implements MoveBucketLimiter { + @Override + public int startBucket(int currentBucket) { + return currentBucket & (~1); + } + + @Override + public int endBucket(int currentBucket) { + return currentBucket | 1; + } + + @Override + public int otherBucket(int currentBucket) { + return currentBucket ^ 1; + } +} diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/recursive/RecursiveSettings.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/recursive/RecursiveSettings.java new file mode 100644 index 000000000..0df299aa7 --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/recursive/RecursiveSettings.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.block_app.library.partitioning.recursive; + +import org.apache.giraph.block_app.library.partitioning.vertex.SocialHashPartitionerVertexValue; +import org.apache.giraph.function.primitive.func.Int2IntFunction; + +import com.google.common.base.Preconditions; + +/** + * Recursive settings + */ +public class RecursiveSettings { + public static final int SPLIT_EACH_INTO_NUM_BUCKETS = 2; + + protected int initialNumBuckets = 1; + /** The number of buckets to divide each existing bucket into. */ + protected int numSplits = 0; + /** The number of splits that have already occurred in the initial bucket. */ + protected int priorNumSplits = 0; + + /** Whether to use initialization from a previous partitioning job */ + protected boolean usePreviousPartitioning = false; + + /** Number of recursive levels used in the previous partitioning */ + protected int previousPartitioningNumLevels = 0; + + /** When using recursive splitting, should we split initially */ + protected boolean runIterationsBeforeFirstSplit = false; + + public boolean isRecursiveSplitting() { + return getNumSplits() > 0 || getPriorNumSplits() > 0; + } + + public int getInitialNumBuckets() { + return initialNumBuckets; + } + + public int getFinalNumBuckets() { + return (int) (initialNumBuckets * Math.pow(2, numSplits)); + } + + public int getNumSplits() { + Preconditions.checkState(numSplits >= 0); + return numSplits; + } + + public int getPriorNumSplits() { + Preconditions.checkState(priorNumSplits >= 0); + return priorNumSplits; + } + + public boolean runIterationsBeforeFirstSplit() { + Preconditions.checkState(runIterationsBeforeFirstSplit || numSplits > 0); + return runIterationsBeforeFirstSplit; + } + + public Int2IntFunction getPreviousBucketToBucketAtLevel(int level) { + if (usePreviousPartitioning) { + Preconditions.checkState(isRecursiveSplitting()); + Preconditions.checkState(previousPartitioningNumLevels > 0); + int divisor = (int) Math.pow(SPLIT_EACH_INTO_NUM_BUCKETS, + previousPartitioningNumLevels - level - getPriorNumSplits()); + return (previousBucket) -> { + Preconditions.checkState( + previousBucket != SocialHashPartitionerVertexValue.BUCKET_NOT_SET); + return previousBucket / divisor; + }; + } else { + return null; + } + } + + public static String getCodeSnippet( + int numSplits, + int splitEachIntoNumBuckets, + int priorNumSplits, + int splitMovementDepth, + boolean runIterationsBeforeFirstSplit + ) { + return "numSplits = " + numSplits + ";" + + "splitEachIntoNumBuckets = " + splitEachIntoNumBuckets + ";" + + "priorNumSplits = " + priorNumSplits + ";" + + "splitMovementDepth = " + splitMovementDepth + ";" + + "runIterationsBeforeFirstSplit = " + runIterationsBeforeFirstSplit + ";"; + } + + public RecursiveSettings createCopy() { + RecursiveSettings copy = new RecursiveSettings(); + copy.numSplits = numSplits; + copy.priorNumSplits = priorNumSplits; + copy.usePreviousPartitioning = usePreviousPartitioning; + copy.previousPartitioningNumLevels = previousPartitioningNumLevels; + copy.runIterationsBeforeFirstSplit = runIterationsBeforeFirstSplit; + return copy; + } +} diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/recursive/SHPSplitPiece.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/recursive/SHPSplitPiece.java new file mode 100644 index 000000000..5cafd4f65 --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/recursive/SHPSplitPiece.java @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.block_app.library.partitioning.recursive; + +import org.apache.giraph.block_app.framework.api.BlockMasterApi; +import org.apache.giraph.block_app.framework.api.BlockWorkerReceiveApi; +import org.apache.giraph.block_app.framework.api.BlockWorkerSendApi; +import org.apache.giraph.block_app.framework.api.CreateReducersApi; +import org.apache.giraph.block_app.framework.piece.Piece; +import org.apache.giraph.block_app.framework.piece.global_comm.array.BroadcastArrayHandle; +import org.apache.giraph.block_app.framework.piece.global_comm.array.ReducerArrayHandle; +import org.apache.giraph.block_app.framework.piece.interfaces.VertexReceiver; +import org.apache.giraph.block_app.framework.piece.interfaces.VertexSender; +import org.apache.giraph.block_app.library.partitioning.SHPExecutionStage; +import org.apache.giraph.block_app.library.partitioning.assignment.BucketAssigner; +import org.apache.giraph.block_app.library.partitioning.vertex.SocialHashPartitionerVertexValue; +import org.apache.giraph.block_app.reducers.array.BasicArrayReduce; +import org.apache.giraph.function.primitive.func.Int2IntFunction; +import org.apache.giraph.reducers.impl.SumReduce; +import org.apache.giraph.types.NoMessage; +import org.apache.giraph.types.ops.LongTypeOps; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Writable; + +import com.google.common.base.Preconditions; + + +/** + * Piece which splits each bucket into set of sub-buckets. + * + * Does so by updating candidate and current bucket appropriately. + * + * @param vertex value + */ +public class SHPSplitPiece extends + Piece { + + private final RecursiveSettings recursiveSettings; + private final BucketAssigner bucketAssigner; + + private ReducerArrayHandle bucketSizeReducer; + private BroadcastArrayHandle bucketSizeBroadcast; + + public SHPSplitPiece( + RecursiveSettings recursiveSettings, BucketAssigner bucketAssigner) { + this.recursiveSettings = recursiveSettings.createCopy(); + this.bucketAssigner = bucketAssigner; + } + + @Override + public void registerReducers( + CreateReducersApi reduceApi, SHPExecutionStage executionStage) { + bucketSizeReducer = BasicArrayReduce.createArrayHandles( + executionStage.getNumBuckets(), + LongTypeOps.INSTANCE, + SumReduce.LONG, + reduceApi::createLocalReducer + ); + } + + @Override + public VertexSender getVertexSender( + BlockWorkerSendApi workerApi, + SHPExecutionStage executionStage + ) { + return (vertex) -> { + V value = vertex.getValue(); + if (!value.getShouldVertexBeAssignedBucket()) { + return; + } + + int candidateBucket = value.getCandidateBucket(); + if (candidateBucket != SocialHashPartitionerVertexValue.BUCKET_NOT_SET) { + reduceLong(bucketSizeReducer.get(candidateBucket), 1); + } + }; + } + + @Override + public void masterCompute( + BlockMasterApi masterApi, SHPExecutionStage executionStage) { + bucketSizeBroadcast = bucketSizeReducer.broadcastValue(masterApi); + } + + @Override + public + VertexReceiver getVertexReceiver( + BlockWorkerReceiveApi workerApi, + SHPExecutionStage executionStage + ) { + int splitEachIntoNumBuckets = + RecursiveSettings.SPLIT_EACH_INTO_NUM_BUCKETS; + + Int2IntFunction toBucketAtCurrentLevel = + recursiveSettings.getPreviousBucketToBucketAtLevel( + executionStage.getSplits()); + Int2IntFunction toBucketAtNextLevel = + recursiveSettings.getPreviousBucketToBucketAtLevel( + executionStage.getSplits() + 1); + + return (vertex, messages) -> { + V vertexValue = vertex.getValue(); + vertexValue.reset(); + + if (!vertexValue.getShouldVertexBeAssignedBucket()) { + return; + } + + if (toBucketAtNextLevel != null) { + int previousBucket = vertexValue.getPreviousLastLevelBucket(); + if (previousBucket != + SocialHashPartitionerVertexValue.BUCKET_NOT_SET) { + vertexValue.setInitialBucket( + toBucketAtNextLevel.apply(previousBucket), true); + } + } + + int candidateBucket = vertexValue.getCandidateBucket(); + if (candidateBucket != SocialHashPartitionerVertexValue.BUCKET_NOT_SET) { + int beginBucket = candidateBucket * splitEachIntoNumBuckets; + int endBucket = + candidateBucket * splitEachIntoNumBuckets + + splitEachIntoNumBuckets; + long numVertices = bucketSizeBroadcast.get(candidateBucket) + .getBroadcast(workerApi).get(); + int newCandidate = bucketAssigner.getAssignedBucket( + vertexValue, + beginBucket, + endBucket, + numVertices + ); + if (toBucketAtCurrentLevel != null) { + int previousBucket = vertexValue.getPreviousLastLevelBucket(); + if (previousBucket != + SocialHashPartitionerVertexValue.BUCKET_NOT_SET) { + int predictedBucket = toBucketAtCurrentLevel.apply(previousBucket); + int nextBucket = toBucketAtNextLevel.apply(previousBucket); + if (candidateBucket == predictedBucket) { + newCandidate = nextBucket; + } + } + } + Preconditions.checkState( + candidateBucket * splitEachIntoNumBuckets <= newCandidate && + newCandidate < (candidateBucket + 1) * splitEachIntoNumBuckets); + vertexValue.setCandidateBucket(newCandidate); + } + }; + } + + @Override + public SHPExecutionStage nextExecutionStage( + SHPExecutionStage executionStage) { + return executionStage.changedSplits(executionStage.getSplits() + 1). + changedNumBuckets( + executionStage.getNumBuckets() * + RecursiveSettings.SPLIT_EACH_INTO_NUM_BUCKETS). + changedLastSplitIteration(executionStage.getIteration()); + } +} diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/recursive/package-info.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/recursive/package-info.java new file mode 100644 index 000000000..d0a00a07e --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/recursive/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Utilities for handling recursive partitioning + */ +package org.apache.giraph.block_app.library.partitioning.recursive; diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/vertex/CachedNeighborData.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/vertex/CachedNeighborData.java new file mode 100644 index 000000000..1d800b3bd --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/vertex/CachedNeighborData.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.block_app.library.partitioning.vertex; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.Iterator; + +import org.apache.giraph.block_app.library.partitioning.goal.VertexBucketMessage; +import org.apache.giraph.function.primitive.comparators.IntComparatorFunction; +import org.apache.giraph.types.ops.collections.array.WIntArrayList; +import org.apache.giraph.types.ops.collections.array.WLongArrayList; +import org.apache.hadoop.io.Writable; + +import com.google.common.base.Preconditions; + +import it.unimi.dsi.fastutil.Arrays; +import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap; + +/** Vertex state responsible for tracking data about neighbors */ +public class CachedNeighborData implements Writable { + private WLongArrayList ids = null; + private WIntArrayList buckets = null; + + public CachedNeighborData() { + } + + public CachedNeighborData(Iterator updates) { + ids = new WLongArrayList(); + buckets = new WIntArrayList(); + + while (updates.hasNext()) { + VertexBucketMessage next = updates.next(); + ids.add(next.getId()); + buckets.add(next.getBucket()); + } + + ids.trim(); + buckets.trim(); + } + + public void update(Iterator updates) { + Long2IntOpenHashMap updateMap = new Long2IntOpenHashMap(); + updateMap.defaultReturnValue(-2); + while (updates.hasNext()) { + VertexBucketMessage next = updates.next(); + updateMap.put(next.getId(), next.getBucket()); + } + + for (int i = 0; i < ids.size(); i++) { + int bucket = updateMap.remove(ids.getLong(i)); + if (bucket != -2) { + buckets.set(i, bucket); + } + } + + Preconditions.checkState(updateMap.isEmpty()); + } + + public void sortByBucket() { + Arrays.quickSort( + 0, + buckets.size(), + (IntComparatorFunction) (i, j) -> + Integer.compare(buckets.getInt(i), buckets.getInt(j)), + (i, j) -> { + ids.swap(i, j); + buckets.swap(i, j); + }); + } + + public int size() { + return buckets.size(); + } + + public int getBucket(int index) { + return buckets.getInt(index); + } + + public long getId(int index) { + return ids.getLong(index); + } + + public void fillIdSubset(WLongArrayList targets, int start, int end) { + targets.clear(); + targets.addElements(0, ids.elements(), start, end - start); + } + + @Override + public void readFields(DataInput in) throws IOException { + ids = WLongArrayList.readNew(in); + buckets = WIntArrayList.readNew(in); + } + + @Override + public void write(DataOutput out) throws IOException { + ids.write(out); + buckets.write(out); + } + +} diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/vertex/SocialHashPartitionerVertexValue.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/vertex/SocialHashPartitionerVertexValue.java new file mode 100644 index 000000000..6a2fb4994 --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/vertex/SocialHashPartitionerVertexValue.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.block_app.library.partitioning.vertex; + +import java.util.Iterator; + +import org.apache.giraph.block_app.library.partitioning.goal.VertexBucketMessage; +import org.apache.giraph.edge.Edge; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; + +/** Social Hash Partitioner Vertex Value interface */ +public interface SocialHashPartitionerVertexValue extends Writable { + int BUCKET_NOT_SET = -1; + + int getCandidateBucket(); + void setCandidateBucket(int candidateBucket); + + int getCurrentBucket(); + void setCurrentBucket(int currentBucket); + + int getInitialBucket(); + void setInitialBucket(int initialBucket, boolean hadAssignment); + + int getPreviousLastLevelBucket(); + + void setBisectionMoveGain(float gain); + float getBisectionMoveGain(); + + void reset(); + + boolean getShouldVertexBeAssignedBucket(); + void setShouldVertexBeAssignedBucket(boolean shouldVertexBeAssignedBucket); + + CachedNeighborData updateNeighborData(Iterator updates); + + void initUselessEdgesSet(int numEdges); + void markEdgeAsUseless(int index); + + Iterator getUsefulEdgeIdIterator( + Iterator> iterator); + + void incrementRemovedIsolatedNeighbors(int value); + int getRemovedIsolatedNeighbors(); +} diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/vertex/SocialHashPartitionerVertexValueImpl.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/vertex/SocialHashPartitionerVertexValueImpl.java new file mode 100644 index 000000000..c9e76ad48 --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/vertex/SocialHashPartitionerVertexValueImpl.java @@ -0,0 +1,197 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.giraph.block_app.library.partitioning.vertex; + +import java.util.BitSet; +import java.util.Iterator; + +import org.apache.giraph.block_app.library.partitioning.goal.VertexBucketMessage; +import org.apache.giraph.edge.Edge; +import org.apache.giraph.writable.kryo.KryoWritable; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; + +import com.google.common.base.Preconditions; +import com.google.common.collect.AbstractIterator; + +/** Social Hash Partitioner Vertex Value Implementation */ +public class SocialHashPartitionerVertexValueImpl extends KryoWritable + implements SocialHashPartitionerVertexValue { + private boolean hasInitialAssignment; + private int initialBucket; + + private int previousLastLevelBucket; + + private int candidateBucket; + private int currentBucket; + + private float bisectionMoveGain; + + private boolean shouldVertexBeAssignedBucket; + + private CachedNeighborData neighborData; + private int removedIsolatedNeighbors; + + private BitSet uselessEdges; + + public SocialHashPartitionerVertexValueImpl() { + initialize(); + } + + public void initialize() { + this.currentBucket = BUCKET_NOT_SET; + this.candidateBucket = BUCKET_NOT_SET; + this.initialBucket = BUCKET_NOT_SET; + this.hasInitialAssignment = false; + this.shouldVertexBeAssignedBucket = false; + this.neighborData = null; + this.uselessEdges = null; + } + + @Override + public void reset() { + currentBucket = BUCKET_NOT_SET; + bisectionMoveGain = 0; + neighborData = null; + } + + @Override + public int getCandidateBucket() { + return candidateBucket; + } + + @Override + public void setCandidateBucket(int candidateBucket) { + if (candidateBucket < 0 && candidateBucket != BUCKET_NOT_SET) { + throw new IllegalArgumentException( + "Candidate bucket cannot be " + candidateBucket); + } + this.candidateBucket = candidateBucket; + } + + @Override + public int getCurrentBucket() { + return currentBucket; + } + + @Override + public void setCurrentBucket(int currentBucket) { + if (currentBucket < 0 && currentBucket != BUCKET_NOT_SET) { + throw new IllegalArgumentException( + "Current bucket cannot be " + currentBucket); + } + this.currentBucket = currentBucket; + } + + @Override + public int getInitialBucket() { + return initialBucket; + } + + @Override + public void setInitialBucket( + int initialBucket, boolean hadAssignment) { + Preconditions.checkState(initialBucket != BUCKET_NOT_SET); + this.initialBucket = initialBucket; + this.hasInitialAssignment = hadAssignment; + } + + @Override + public int getPreviousLastLevelBucket() { + return previousLastLevelBucket; + } + + @Override + public void setBisectionMoveGain(float bisectionMoveGain) { + this.bisectionMoveGain = bisectionMoveGain; + } + + @Override + public float getBisectionMoveGain() { + return bisectionMoveGain; + } + + @Override + public void setShouldVertexBeAssignedBucket( + boolean shouldVertexBeAssignedBucket) { + this.shouldVertexBeAssignedBucket = shouldVertexBeAssignedBucket; + } + + @Override + public boolean getShouldVertexBeAssignedBucket() { + return shouldVertexBeAssignedBucket; + } + + @Override + public CachedNeighborData updateNeighborData( + Iterator updates) { + if (updates.hasNext()) { + if (neighborData == null) { + neighborData = new CachedNeighborData(updates); + } else { + neighborData.update(updates); + } + + neighborData.sortByBucket(); + } + return neighborData; + } + + @Override + public void initUselessEdgesSet(int numEdges) { + uselessEdges = new BitSet(numEdges); + } + + @Override + public void markEdgeAsUseless(int index) { + uselessEdges.set(index, true); + } + + @Override + public + Iterator getUsefulEdgeIdIterator( + Iterator> iterator) { + return new AbstractIterator() { + private int index = 0; + @Override + protected I computeNext() { + while (iterator.hasNext() && uselessEdges.get(index)) { + index++; + iterator.next(); + } + if (iterator.hasNext()) { + Preconditions.checkState(!uselessEdges.get(index)); + index++; + return iterator.next().getTargetVertexId(); + } else { + return endOfData(); + } + } + }; + } + + @Override + public void incrementRemovedIsolatedNeighbors(int value) { + removedIsolatedNeighbors += value; + } + + @Override + public int getRemovedIsolatedNeighbors() { + return removedIsolatedNeighbors; + } +} diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/vertex/package-info.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/vertex/package-info.java new file mode 100644 index 000000000..569bebad5 --- /dev/null +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/partitioning/vertex/package-info.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Classes for vertex state. + */ +package org.apache.giraph.block_app.library.partitioning.vertex; diff --git a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/stats/PartitioningStats.java b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/stats/PartitioningStats.java index 950c14442..1726076f2 100644 --- a/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/stats/PartitioningStats.java +++ b/giraph-block-app-8/src/main/java/org/apache/giraph/block_app/library/stats/PartitioningStats.java @@ -17,27 +17,38 @@ */ package org.apache.giraph.block_app.library.stats; -import it.unimi.dsi.fastutil.longs.LongOpenHashSet; -import it.unimi.dsi.fastutil.longs.LongSet; +import java.util.stream.LongStream; +import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.apache.giraph.block_app.framework.block.Block; +import org.apache.giraph.block_app.library.Pieces; import org.apache.giraph.block_app.library.SendMessageChain; +import org.apache.giraph.block_app.reducers.array.BasicArrayReduce; import org.apache.giraph.function.primitive.DoubleConsumer; +import org.apache.giraph.function.primitive.PrimitiveRefs.IntRef; import org.apache.giraph.function.vertex.SupplierFromVertex; import org.apache.giraph.reducers.impl.PairReduce; import org.apache.giraph.reducers.impl.SumReduce; +import org.apache.giraph.types.ops.LongTypeOps; +import org.apache.giraph.types.ops.collections.array.WArrayList; +import org.apache.giraph.types.ops.collections.array.WLongArrayList; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.log4j.Logger; +import com.google.common.base.Preconditions; + +import it.unimi.dsi.fastutil.longs.LongOpenHashSet; +import it.unimi.dsi.fastutil.longs.LongSet; + /** * Utility blocks for calculating stats for a given partitioning - an * assignment of vertices to buckets. */ public class PartitioningStats { - private static final Logger LOG = Logger.getLogger(PartitioningStats.class); + private static final Logger LOG = Logger.getLogger(DirectedGraphStats.class); private PartitioningStats() { } @@ -76,11 +87,13 @@ public static Block calculateEdgeLocality( long localEdges = reducedPair.getLeft().get(); long totalEdges = reducedPair.getRight().get(); double edgeLocality = (double) localEdges / totalEdges; - LOG.info("locality ratio = " + edgeLocality); + master.logToCommandLine("locality ratio = " + edgeLocality); master.getCounter( - "Edge locality stats", "edge locality (in percent * 1000)") + "Partitioning stats", "edge locality (in percent * 1000)") .setValue((long) (edgeLocality * 100000)); - edgeLocalityConsumer.apply(edgeLocality); + if (edgeLocalityConsumer != null) { + edgeLocalityConsumer.apply(edgeLocality); + } } ); } @@ -93,6 +106,8 @@ public static Block calculateFanout( SupplierFromVertex bucketSupplier, DoubleConsumer averageFanoutConsumer) { + final Pair zero = + Pair.of(new LongWritable(0), new LongWritable(0)); final Pair pair = Pair.of(new LongWritable(), new LongWritable(1)); return SendMessageChain. @@ -108,18 +123,64 @@ public static Block calculateFanout( for (LongWritable neighborBucket : messages) { setOfNeighborBuckets.add(neighborBucket.get()); } - pair.getLeft().set(setOfNeighborBuckets.size()); - return pair; + if (setOfNeighborBuckets.isEmpty()) { + return zero; + } else { + pair.getLeft().set(setOfNeighborBuckets.size()); + return pair; + } }, (reducedPair, master) -> { long fanout = reducedPair.getLeft().get(); long numVertices = reducedPair.getRight().get(); double avgFanout = (double) fanout / numVertices; - LOG.info("fanout ratio = " + avgFanout); - master.getCounter("Fanout stats", "fanout (in percent * 1000)") - .setValue((long) (avgFanout * 100000)); - averageFanoutConsumer.apply(avgFanout); + master.logToCommandLine("fanout ratio = " + avgFanout); + master.getCounter("Partitioning stats", "fanout (* 1000)") + .setValue((long) (avgFanout * 1000)); + if (averageFanoutConsumer != null) { + averageFanoutConsumer.apply(avgFanout); + } } ); } + + public static Block calculateImbalance( + int numBuckets, + SupplierFromVertex + bucketSupplier, + DoubleConsumer imbalanceConsumer) { + return Pieces., WArrayList, + WritableComparable, V, Writable>reduceWithMaster( + "CalcBalancePiece", + new BasicArrayReduce<>( + numBuckets, LongTypeOps.INSTANCE, SumReduce.LONG), + (vertex) -> { + LongWritable bucket = bucketSupplier.get(vertex); + if (bucket != null) { + Preconditions.checkState( + bucket.get() >= 0 && bucket.get() < numBuckets); + return ImmutablePair.of( + new IntRef((int) bucket.get()), new LongWritable(1)); + } + return null; + }, + (value, master) -> { + long sum = LongStream.of( + ((WLongArrayList) value).toLongArray()).sum(); + long max = LongStream.of( + ((WLongArrayList) value).toLongArray()).max().getAsLong(); + double imbalance = max / (((double) sum) / numBuckets); + + master.logToCommandLine( + "imbalance ratio = " + imbalance + " = " + max + " / " + sum + + " / " + numBuckets); + master.getCounter("Partitioning stats", "imbalance (* 1000)") + .setValue((long) (imbalance * 1000)); + + if (imbalanceConsumer != null) { + imbalanceConsumer.apply(imbalance); + } + }); + + } } diff --git a/giraph-block-app-8/src/test/java/org/apache/giraph/block_app/framework/no_vtx/MessagesWithoutVerticesTest.java b/giraph-block-app-8/src/test/java/org/apache/giraph/block_app/framework/no_vtx/MessagesWithoutVerticesTest.java index bf3e1946a..a1c93594f 100644 --- a/giraph-block-app-8/src/test/java/org/apache/giraph/block_app/framework/no_vtx/MessagesWithoutVerticesTest.java +++ b/giraph-block-app-8/src/test/java/org/apache/giraph/block_app/framework/no_vtx/MessagesWithoutVerticesTest.java @@ -16,7 +16,7 @@ import org.apache.giraph.block_app.test_setup.graphs.Small1GraphInit; import org.apache.giraph.conf.GiraphConfiguration; import org.apache.giraph.function.ObjectTransfer; -import org.apache.giraph.function.primitive.Int2IntFunction; +import org.apache.giraph.function.primitive.func.Int2IntFunction; import org.apache.giraph.graph.Vertex; import org.apache.giraph.types.ops.collections.array.WIntArrayList; import org.apache.hadoop.io.IntWritable; diff --git a/giraph-block-app-8/src/test/java/org/apache/giraph/block_app/library/partitioning/SocialHashPartitionerTest.java b/giraph-block-app-8/src/test/java/org/apache/giraph/block_app/library/partitioning/SocialHashPartitionerTest.java new file mode 100644 index 000000000..bd76e6b06 --- /dev/null +++ b/giraph-block-app-8/src/test/java/org/apache/giraph/block_app/library/partitioning/SocialHashPartitionerTest.java @@ -0,0 +1,48 @@ +package org.apache.giraph.block_app.library.partitioning; + +import org.apache.giraph.block_app.framework.BlockUtils; +import org.apache.giraph.block_app.library.partitioning.vertex.SocialHashPartitionerVertexValue; +import org.apache.giraph.block_app.test_setup.TestGraphUtils; +import org.apache.giraph.block_app.test_setup.graphs.SyntheticGraphInit; +import org.apache.giraph.conf.GiraphConfiguration; +import org.apache.giraph.graph.Vertex; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.junit.Test; + +public class SocialHashPartitionerTest { + private void testSumOverSameGroup(boolean fullGiraphEnv) throws Exception { + TestGraphUtils.runTest( +// new SmallDirectedTreeGraphInit(), + new SyntheticGraphInit(), + (graph) -> { + for (Vertex vtx : graph.getTestGraph()) { + } + }, + (GiraphConfiguration conf) -> { + BlockUtils.setBlockFactoryClass(conf, SocialHashPartitionerBlockFactory.class); + TestGraphUtils.USE_FULL_GIRAPH_ENV_IN_TESTS.set(conf, fullGiraphEnv); + + SocialHashPartitionerSettings.RECURSIVE.setCodeSnippet(conf, "numSplits=6;"); + SocialHashPartitionerSettings.NUM_ITERATIONS_PER_SPLIT.set(conf, 40); + SocialHashPartitionerSettings.FANOUT_PROBABILITY.setCodeSnippet( + conf, "0.5f"); + SocialHashPartitionerSettings.ALLOWED_IMBALANCE.set(conf, 0.05f); + + SyntheticGraphInit.NUM_VERTICES.set(conf, 50000); + SyntheticGraphInit.NUM_EDGES_PER_VERTEX.set(conf, 10); + SyntheticGraphInit.NUM_COMMUNITIES.set(conf, 100); + SyntheticGraphInit.ACTUAL_LOCALITY_RATIO.set(conf, 0.8f); + }); + } + + @Test + public void testWithLocalBlockRunner() throws Exception { + testSumOverSameGroup(false); + } + + @Test + public void testWithGiraphEnv() throws Exception { + testSumOverSameGroup(true); + } +} diff --git a/giraph-block-app-8/src/test/java/org/apache/giraph/block_app/library/partitioning/decide/HistogramDescTest.java b/giraph-block-app-8/src/test/java/org/apache/giraph/block_app/library/partitioning/decide/HistogramDescTest.java new file mode 100644 index 000000000..9c9b95764 --- /dev/null +++ b/giraph-block-app-8/src/test/java/org/apache/giraph/block_app/library/partitioning/decide/HistogramDescTest.java @@ -0,0 +1,263 @@ +package org.apache.giraph.block_app.library.partitioning.decide; + +import static org.junit.Assert.assertEquals; + +import java.util.Arrays; + +import org.apache.giraph.function.primitive.func.Byte2LongFunction; +import org.junit.Test; + +import it.unimi.dsi.fastutil.bytes.Byte2LongOpenHashMap; + +public class HistogramDescTest { + private static final float EPS = 1e-5f; + + @Test + public void testBins() { + HistogramDesc hist = HistogramDescImpl.create(2, 2); + + assertEquals(9, hist.largestIndex()); + + assertEquals(Float.NEGATIVE_INFINITY, hist.smallestValue((byte) 0), EPS); + + assertEquals(- 4 * HistogramDescImpl.NEG_SYM_OFFSET, hist.smallestValue((byte) 1), EPS); + assertEquals(- 2 * HistogramDescImpl.NEG_SYM_OFFSET, hist.smallestValue((byte) 2), EPS); + assertEquals(- 1 * HistogramDescImpl.NEG_SYM_OFFSET, hist.smallestValue((byte) 3), EPS); + assertEquals(- 0.5 * HistogramDescImpl.NEG_SYM_OFFSET, hist.smallestValue((byte) 4), EPS); + + assertEquals(0, hist.smallestValue((byte) 5), EPS); + assertEquals(0.5, hist.smallestValue((byte) 6), EPS); + assertEquals(1, hist.smallestValue((byte) 7), EPS); + assertEquals(2, hist.smallestValue((byte) 8), EPS); + assertEquals(4, hist.smallestValue((byte) 9), EPS); + + assertEquals(8, hist.toBin(2)); + assertEquals(8, hist.toBin(2 + EPS)); + assertEquals(7, hist.toBin(2 - EPS)); + } + + private static void concreteExample( + long[] bucketSizes, + float allowedSize, float moveProbability, float maxMoveRatio, + long[] leftMoveCandidates, + long[] rightMoveCandidates, + byte[] expectedBins, float[] expectedProbs + ) { + HistogramDesc hist = HistogramDescImpl.create(30, (float) Math.pow(2, 0.5)); + byte[] bins = new byte[2]; + float[] probs = new float[2]; + hist.computeWhatToSwap( + new int[] {0, 1}, + bucketSizes, + allowedSize, moveProbability, maxMoveRatio, + new Byte2LongFunction[] { + bin -> leftMoveCandidates[bin], + bin -> rightMoveCandidates[bin] + }, bins, probs); + assertEquals(expectedBins[0], bins[0]); + assertEquals(expectedProbs[0], probs[0], EPS); + assertEquals(expectedBins[1], bins[1]); + assertEquals(expectedProbs[1], probs[1], EPS); + } + + @Test + public void testConcreteExample1() { + concreteExample( + new long[] {3077496, 3076168}, + 3093583.5f, 0.8f, 0.1f, + new long[] { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 1382, 21619, 72606, 112344, 121484, 183594, 156313, 121986, 84027, 92026, 58414, 77552, 46004, 65163, 39110, 50724, 33211, 42052, 28679, 34457, 25232, 8673, 8078, 9053, 8836, 9411, 9788, 9350, 10062, 10527, 10663, 10367, 10738, 108919, 1093194, 18028, 3374, 16775, 3767, 16590, 3890, 15177, 3637, 13471, 3555, 12864, 3531, 30526, 8418, 22612, 7218, 13443, 5830, 11665, 6672, 11480, 6879, 11394, 7389, 11736, 7258, 6853, 3420, 1915, 812, 302, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }, + new long[] { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 1410, 22257, 74387, 112939, 122726, 183878, 158154, 121899, 84570, 91542, 57802, 77768, 45971, 64265, 38725, 50560, 32780, 42149, 28476, 33549, 25183, 8307, 7850, 8553, 8676, 8883, 9238, 10051, 9837, 9942, 10365, 9629, 10713, 110208, 1092378, 17593, 3370, 17110, 3544, 16782, 3975, 15307, 3750, 14539, 3898, 12932, 3675, 30541, 8395, 22540, 7299, 13353, 5814, 11997, 6504, 11290, 6952, 11341, 7331, 11470, 7142, 6750, 3603, 1847, 845, 335, 38, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }, + new byte[] { 61, 62 }, + new float[] { 0.000614f, 1.0f}); + } + + @Test + public void testConcreteExample2() { + concreteExample( + new long[] {1515643, 1555203}, + 1555167.8f, 0.8f, 0.1f, + new long[] { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 575, 14057, 86413, 170193, 173141, 157592, 143723, 221652, 107161, 127341, 62052, 93122, 41273, 62860, 24811, 29574, 12393, 8087, 4726, 4103, 2021, 2330, 1078, 333, 183, 224, 146, 192, 133, 176, 118, 160, 101, 111, 71, 629, 895, 45, 15, 72, 22, 62, 28, 77, 29, 75, 54, 92, 34, 166, 64, 123, 48, 61, 32, 41, 41, 48, 39, 55, 34, 46, 20, 16, 5, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }, + new long[] { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 531, 9774, 41275, 71131, 71482, 76691, 70767, 48646, 49813, 32889, 36165, 24834, 28477, 20450, 24349, 17358, 22546, 16228, 22384, 15079, 23010, 5619, 7641, 4043, 6301, 3776, 5801, 3589, 5500, 3345, 4942, 3060, 4270, 26941, 195386, 10638, 5677, 11131, 6749, 11584, 7793, 11997, 8648, 12225, 9320, 12922, 10979, 40207, 34573, 41776, 35098, 37121, 28174, 24674, 21574, 17339, 17008, 16085, 15907, 16447, 15247, 13487, 9245, 5256, 2097, 498, 69, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }, + new byte[] { 38, 84 }, + new float[] { 0.79067767f, 1.0f}); + } + + @Test + public void testConcreteExample3() { + concreteExample( + new long[] {773227, 781808}, + 781771.75f, 0.8f, 0.1f, + new long[] { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6067, 61301, 98082, 173628, 222388, 163516, 48898, 6538, 987, 281, 79, 22, 5, 1, 1, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 1, 2, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }, + new long[] { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 413, 5474, 19873, 30222, 30322, 26050, 19962, 15016, 10950, 7885, 5663, 3973, 2790, 2067, 1429, 970, 744, 529, 376, 232, 184, 120, 96, 76, 53, 33, 32, 55, 53, 24, 34, 35, 62, 93, 130, 193, 236, 338, 478, 720, 1032, 1519, 2071, 2952, 4157, 5977, 8541, 12557, 17651, 25957, 38027, 60704, 112164, 134663, 94372, 48894, 13011, 853, 118, 17, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }, + new byte[] { 33, 89 }, + new float[] { 0.34266233f, 1.0f}); + } + + @Test + public void testConcreteExample4() { + concreteExample( + new long[] {300, 298}, + 299.55576f, 0.8f, 0.1f, + new long[] { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1, 4, 6, 5, 7, 9, 9, 20, 15, 16, 13, 7, 12, 7, 10, 4, 2, 0, 3, 0, 1, 1, 2, 2, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 2, 0, 2, 0, 0, 1, 0, 5, 3, 3, 9, 9, 8, 13, 7, 11, 17, 13, 8, 8, 8, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }, + new long[] { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 6, 7, 7, 13, 11, 13, 15, 5, 11, 12, 10, 9, 7, 2, 2, 2, 3, 0, 3, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 4, 2, 2, 0, 6, 2, 7, 5, 15, 5, 15, 14, 10, 20, 17, 11, 6, 2, 2, 2, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }, + new byte[] { 90, 90 }, + new float[] { 0.9687481f, 0.23529f}); + } + + @Test + public void testConcreteExample5() { + concreteExample( + new long[] {633331, 608902}, + 633213.94f, 0.8f, 0.1f, + new long[] { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 47, 258, 629, 1047, 2249, 3803, 6124, 9635, 14085, 19718, 25600, 30271, 34577, 36340, 35570, 33428, 27054, 20720, 16493, 12041, 9715, 7181, 5821, 4736, 3711, 3203, 2511, 2005, 1660, 1591, 1234, 1108, 879, 281, 227, 182, 164, 200, 150, 199, 83, 119, 74, 1165, 13084, 263, 43, 340, 55, 388, 81, 464, 102, 615, 175, 2921, 763, 3484, 1088, 4363, 1960, 5596, 3652, 7253, 5960, 10366, 9798, 14698, 16360, 22849, 27448, 34631, 23144, 10829, 4669, 2014, 913, 377, 165, 56, 30, 6, 7, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }, + new long[] { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 5, 5, 20, 33, 81, 163, 297, 564, 1178, 2538, 5564, 12209, 22936, 37225, 52651, 67670, 78517, 83639, 85410, 76760, 56153, 38316, 4776, 1983, 954, 509, 270, 177, 114, 86, 59, 41, 21, 12, 15, 11, 7, 7, 3, 2, 2, 0, 1, 0, 0, 0, 0, 1, 0, 6, 22, 1, 0, 0, 1, 2, 1, 2, 2, 4, 1, 6, 3, 4, 6, 8, 9, 25, 28, 39, 45, 69, 97, 148, 147, 181, 182, 226, 262, 217, 175, 159, 109, 61, 49, 23, 10, 9, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }, + new byte[] { 90, 32 }, + new float[] { 1.0f, 0.99327344f}); + } + + @Test + public void testConcreteExample6() { + concreteExample( + new long[] {3092325, 3061339}, + 3092216.2f, 0.8f, 0.1f, + new long[] { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 870, 33355, 228470, 433618, 438235, 370844, 335231, 320689, 368419, 232206, 142955, 77220, 36397, 16593, 8984, 5244, 3378, 1979, 1256, 937, 647, 372, 326, 235, 110, 68, 74, 67, 60, 50, 55, 40, 29, 268, 217, 67, 14, 12, 8, 10, 21, 26, 20, 42, 56, 88, 83, 103, 79, 105, 105, 132, 110, 122, 176, 144, 95, 129, 45, 28, 14, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }, + new long[] { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 27, 2165, 27339, 94172, 147656, 153030, 138046, 122273, 108914, 88541, 83890, 69922, 60885, 52460, 49996, 41073, 36114, 31953, 33362, 26191, 22143, 18651, 10753, 7796, 6526, 5341, 4478, 3732, 3219, 3184, 2819, 26417, 116437, 60511, 7417, 6621, 6954, 7568, 10491, 9994, 11677, 14454, 34254, 41373, 49789, 70529, 79202, 83515, 95138, 114756, 114194, 119667, 120509, 122153, 98144, 78533, 60643, 39664, 20624, 9818, 3648, 879, 93, 7, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }, + new byte[] { 38, 84 }, + new float[] { 1.0f, 0.7596791f}); + } + + void testComputeWhatToSwap( + long[] bucketSize, float allowedSize, float moveProbability, float maxMoveRatio, + Byte2LongFunction[] getMoveCandidates, + int expectedLeftBin, float expectedLeftProb, + int expectedRightBin, float expectedRightProb) { + HistogramDesc hist = HistogramDescImpl.create(2, 2); + byte[] bins = new byte[2]; + float[] probs = new float[2]; + hist.computeWhatToSwap( + new int[] {0, 1}, bucketSize, allowedSize, moveProbability, maxMoveRatio, getMoveCandidates, bins, probs); + + System.out.println(Arrays.toString(bins) + " " + Arrays.toString(probs)); + assertEquals(expectedLeftBin, bins[0]); + assertEquals(expectedLeftProb, probs[0], EPS); + assertEquals(expectedRightBin, bins[1]); + assertEquals(expectedRightProb, probs[1], EPS); + } + + Byte2LongFunction toF(int... values) { + Byte2LongOpenHashMap map = new Byte2LongOpenHashMap(); + for (int i = 0; i < values.length; i+= 2) { + map.put((byte) values[i], values[i + 1]); + } + return map::get; + } + + @Test + public void testMovement() { + testComputeWhatToSwap( + new long[] {10, 10}, 10, 1.0f, 1.0f, new Byte2LongFunction[] { + toF(9, 2), + toF(5, 1), + }, + 9, 0.5f, 5, 1f); + testComputeWhatToSwap( + new long[] {10, 10}, 10, 1.0f, 1.0f, new Byte2LongFunction[] { + toF(9, 3), + toF(5, 1), + }, + 9, 1.0f/3, 5, 1f); + + testComputeWhatToSwap( + new long[] {10, 0}, 5, 1.0f, 1.0f, new Byte2LongFunction[] { + toF(), + toF(0, 10), + }, + 10, 1f, 0, 0.5f); + + testComputeWhatToSwap( + new long[] {15, 5}, 10, 1.0f, 0.5f, new Byte2LongFunction[] { + toF(), + toF(0, 10), + }, + 10, 1f, 0, 0.5f); + testComputeWhatToSwap( + new long[] {20, 10}, 15, 1.0f, 0.5f, new Byte2LongFunction[] { + toF(), + toF(0, 10), + }, + 10, 1f, 0, 0.5f); + + testComputeWhatToSwap( + new long[] {10, 10}, 10, 1.0f, 1.0f, new Byte2LongFunction[] { + toF(3, 2, 4, 2, 5, 2, 6, 2, 7, 2), + toF(3, 2, 4, 2, 5, 2, 6, 2, 7, 2), + }, + 6, 1f, 6, 1f); + + testComputeWhatToSwap( + new long[] {16, 10}, 13, 1.0f, 1.0f, new Byte2LongFunction[] { + toF(3, 2, 4, 2, 5, 2, 6, 2, 7, 2), + toF(3, 2, 4, 2, 5, 2, 6, 2, 7, 2), + }, + 6, 1f, 4, 0.5f); + } + + @Test + public void testMovementWithAllowedImbalance() { + testComputeWhatToSwap( + new long[] {10, 10}, 11, 1.0f, 1.0f, new Byte2LongFunction[] { + toF(9, 2), + toF(5, 1), + }, + 9, 1f, 5, 1f); + testComputeWhatToSwap( + new long[] {10, 10}, 12, 1.0f, 1.0f, new Byte2LongFunction[] { + toF(9, 2), + toF(5, 1), + }, + 9, 1f, 10, 1f); + + testComputeWhatToSwap( + new long[] {16, 10}, 16, 1.0f, 1.0f, new Byte2LongFunction[] { + toF(3, 2, 4, 2, 5, 2, 6, 2, 7, 2), + toF(3, 2, 4, 2, 5, 2, 6, 2, 7, 2), + }, + 6, 1f, 6, 1f); + testComputeWhatToSwap( + new long[] {16, 10}, 17, 1.0f, 1.0f, new Byte2LongFunction[] { + toF(3, 2, 4, 2, 5, 2, 6, 2, 7, 2), + toF(3, 2, 4, 2, 5, 2, 6, 2, 7, 2), + }, + 6, 1f, 6, 1f); + testComputeWhatToSwap( + new long[] {16, 10}, 14, 1.0f, 1.0f, new Byte2LongFunction[] { + toF(3, 2, 4, 2, 5, 2, 6, 2, 7, 2), + toF(3, 2, 4, 2, 5, 2, 6, 2, 7, 2), + }, + 6, 1f, 5, 1f); + } +} diff --git a/giraph-block-app/src/main/java/org/apache/giraph/block_app/framework/api/local/LocalBlockRunner.java b/giraph-block-app/src/main/java/org/apache/giraph/block_app/framework/api/local/LocalBlockRunner.java index b07098a97..c7a5e5a5a 100644 --- a/giraph-block-app/src/main/java/org/apache/giraph/block_app/framework/api/local/LocalBlockRunner.java +++ b/giraph-block-app/src/main/java/org/apache/giraph/block_app/framework/api/local/LocalBlockRunner.java @@ -52,6 +52,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Iterables; +import com.google.common.util.concurrent.ThreadFactoryBuilder; /** * Local in-memory Block application job runner. @@ -176,7 +177,8 @@ public void progress() { } })); - ExecutorService executor = Executors.newFixedThreadPool(numThreads); + ExecutorService executor = Executors.newFixedThreadPool( + numThreads, new ThreadFactoryBuilder().setDaemon(true).build()); if (runAllChecks) { for (Vertex vertex : graph) { diff --git a/giraph-block-app/src/main/java/org/apache/giraph/block_app/library/Pieces.java b/giraph-block-app/src/main/java/org/apache/giraph/block_app/library/Pieces.java index 614f4ba03..235940e19 100644 --- a/giraph-block-app/src/main/java/org/apache/giraph/block_app/library/Pieces.java +++ b/giraph-block-app/src/main/java/org/apache/giraph/block_app/library/Pieces.java @@ -236,7 +236,10 @@ public VertexSender getVertexSender( return new InnerVertexSender() { @Override public void vertexSend(Vertex vertex) { - handle.reduce(valueSupplier.get(vertex)); + S value = valueSupplier.get(vertex); + if (value != null) { + handle.reduce(value); + } } }; }