【发布时间】:2015-03-01 01:28:22
【问题描述】:
我正在实现一个模块,其中我的主进程生成一组并行和顺序的子进程(任务)来完成其工作。任务本身主要是从各种来源获取数据并执行计算。有些受 CPU 限制,而另一些受 IO 限制。
当前的实现在多个步骤中使用 Java Executor/Completion 服务来实现这一点。这个过程工作流的一个例子可以描述如下:
任务 A1 ---------------->
任务 0 -> 任务 A2 ---------------->
任务 A3 -> 任务 B1 -> 任务 C(组合所有任务的结果以生成输出)
任务 B2 ->
任务A4--------------->
任务A1-A4 并行运行,任务B1 和B2 也是如此。最后,任务C 依赖于所有任务A 和B 来编译最终输出。
使用 Executor 服务构建它似乎不是很干净,我一直在寻找更好的方法来做到这一点,因为这些任务依赖关系可能会随着时间的推移而改变或增加复杂性,并且有 Futures 和 Callable 来管理它们随着时间的推移会变得更丑。
我一直在探索这个主题,并遇到了 reactive extensions 和 actor model 框架。 Akka 在这方面似乎有点过头了,而高级别的 RxJava 似乎是一个合理的选择,由于其基于流/事件的处理模式,它可以简化设计并使设计更具可扩展性。
RxJava Threading Examples 中的一些示例看起来也很有前景。
我来这里是为了向社区寻求一些建议,看看这是否是正确的方法,以及是否有其他方法/更好的框架来解决这些问题。
================================================ ==================================================== ====
使用 JGraphT 编写了以下内容,但仍需要弄清楚如何重用线程池。在这种情况下,我最终会为每个请求创建新的线程执行器。在此处发布代码的主要部分,以了解该方法。
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import org.jgrapht.DirectedGraph;
import org.jgrapht.graph.DefaultEdge;
import org.jgrapht.traverse.TopologicalOrderIterator;
public class GraphTaskExecutor {
ThreadExecutor executor;
private List<Result> results;
private List<TaskInfo> log;
private DirectedGraph<GraphTask, DefaultEdge> graph;
Set<GraphTask> executing;
public GraphTaskExecutor() {
executor = new ThreadExecutor(Runtime.getRuntime()
.availableProcessors() * 4, 60,
new LinkedBlockingQueue<Runnable>());
results = new ArrayList<Result>();
log = new ArrayList<TaskInfo>();
executing = new HashSet<GraphTask>();
}
public List<Result> execute(Request request, List<GraphTask> tasks) {
System.out.println("Preparing task runner. Num Tasks: " + tasks.size());
graph = new GraphTaskBuilder(tasks).buildGraph();
processTasks();
awaitCompletion();
return results;
}
private void awaitCompletion() {
try {
executor.awaitTermination(3, TimeUnit.DAYS);
System.out.println("Results " + results.toString());
} catch (InterruptedException e) {
e.printStackTrace();
}
}
private void processTasks() {
if (graph.vertexSet().size() == 0) {
executor.shutdown();
System.out
.println("All tasks completed... shutting down executor service");
} else {
synchronized (graph) {
Iterator<GraphTask> iter = new TopologicalOrderIterator<GraphTask, DefaultEdge>(
graph);
while (iter.hasNext()) {
GraphTask task = iter.next();
if (graph.incomingEdgesOf(task).size() == 0
&& !executing.contains(task)) {
executor.execute(task);
executing.add(task);
}
}
}
}
}
private void completed(GraphTask t) {
System.out.println("Completed Task: " + t.getName());
synchronized (graph) {
for (DefaultEdge edge : graph.outgoingEdgesOf(t)) {
GraphTask target = graph.getEdgeTarget(edge);
target.addData(t.getData());
}
if (t.isEndPoint())
results.add(t.getResult());
graph.removeVertex(t);
executing.remove(t);
}
processTasks();
}
private class ThreadExecutor extends ThreadPoolExecutor {
public ThreadExecutor(int corePoolSize, long keepAliveSeconds,
BlockingQueue<Runnable> workQueue) {
super(corePoolSize, corePoolSize, keepAliveSeconds,
TimeUnit.SECONDS, workQueue);
}
@Override
protected void beforeExecute(Thread thread, Runnable runTask) {
super.beforeExecute(thread, runTask);
}
@Override
protected void afterExecute(Runnable runTask, Throwable e) {
super.afterExecute(runTask, e);
completed((GraphTask) runTask);
}
}
public static void main(String arg[]) throws Exception {
GraphTaskExecutor graphTaskExecutor = new GraphTaskExecutor();
TaskContext context = new TaskContext();
List<GraphTask> tasks = new ArrayList<GraphTask>();
Request request = new Request(1);
Set<DataType> empty = new HashSet<DataType>();
Set<DataType> producer = new HashSet<DataType>(Arrays.asList(
DataType.ACCT_INFO, DataType.PROJECTIONS));
Set<DataType> consumer = new HashSet<DataType>(Arrays.asList(
DataType.ACCT_INFO, DataType.PROJECTIONS));
Set<DataType> accountResult = new HashSet<DataType>(
Arrays.asList(DataType.ACCT_INFO));
Set<DataType> projectionResult = new HashSet<DataType>(
Arrays.asList(DataType.PROJECTIONS));
Set<DataType> intraDayResult = new HashSet<DataType>(
Arrays.asList(DataType.PROJECTIONS));
tasks.add(new GraphTask(context, "1", "A", producer, empty, empty));
tasks.add(new GraphTask(context, "2", "X", producer, consumer, empty,
"A"));
tasks.add(new GraphTask(context, "3", "Y", producer, consumer,
accountResult, "A"));
tasks.add(new GraphTask(context, "4", "B", producer, consumer, empty,
"A"));
tasks.add(new GraphTask(context, "5", "C", producer, consumer, empty,
"B"));
tasks.add(new GraphTask(context, "6", "D", producer, consumer,
intraDayResult, "C"));
tasks.add(new GraphTask(context, "7", "E", producer, consumer,
projectionResult, "D", "X", "Y"));
graphTaskExecutor.execute(request, tasks);
System.out.println("All DONE");
}
}
import java.util.Arrays;
import java.util.Set;
import java.util.TreeSet;
public class GraphTask extends AbstractTask {
private Set<String> dependencies = new TreeSet<String>();
public GraphTask(TaskContext context, String id, String name,
Set<DataType> produces, Set<DataType> consumes,
Set<DataType> endpoints, String... dependency) {
super(id, name, context, produces, consumes, endpoints);
dependencies.addAll(Arrays.asList(dependency));
}
public GraphTask(TaskContext context, String id, String name,
Set<DataType> produces, Set<DataType> consumes,
Set<DataType> endpoints) {
super(id, name, context, produces, consumes, endpoints);
}
public void addDependency(String dependency) {
this.dependencies.add(dependency);
}
public Data process(TaskContext context, Data data) throws TaskException {
int time = (int) (Math.random() * 10);
System.out.println("Task " + getName() + " estimated to run for "
+ time + " secs");
TaskResult result = null;
try {
Thread.sleep(time * 1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
result = new TaskResult(getName());
for (DataType d: getProduces()) {
result.addData(d, d.toString());
}
return result;
}
public Set<String> getDependencies() {
return dependencies;
}
public void setDependencies(Set<String> dependencies) {
this.dependencies = dependencies;
}
}
import java.time.LocalTime;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
public abstract class AbstractTask implements Task<Data, Result> {
private String identifier;
private String name;
private TaskContext context;
private List<Data> prevData;
private Data data;
private Set<DataType> produces;
private Set<DataType> consumes;
private Set<DataType> endpoints;
private TaskStatus status;
private LocalTime startTime;
private LocalTime endTime;
private Result result;
public AbstractTask(String id, String name, TaskContext context,
Set<DataType> produces, Set<DataType> consumes,
Set<DataType> endpoints) {
this.identifier = id;
this.name = name;
this.context = context;
this.consumes = consumes;
this.produces = produces;
this.endpoints = endpoints;
this.data = new Data();
this.prevData = new ArrayList<Data>();
this.status = TaskStatus.SUCCESS;
}
public AbstractTask(String id, String name, TaskContext context) {
this(id, name, context, new HashSet<DataType>(),
new HashSet<DataType>(), new HashSet<DataType>());
}
public String getIdentifier() {
return identifier;
}
public void setIdentifier(String identifier) {
this.identifier = identifier;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public TaskContext getContext() {
return context;
}
public void setContext(TaskContext context) {
this.context = context;
}
public boolean isEndPoint() {
return (endpoints.size() > 0);
}
private Data preProcess() throws MissingDataException,
SkippedTaskException, ErrorTaskException {
Map<TaskStatus, Data> statusData = new HashMap<TaskStatus, Data>();
Map<DataType, Object> allData = new HashMap<DataType, Object>();
// Get all data from previous results
for (Data r : prevData) {
statusData.put(r.getStatus(), r);
allData.putAll(r.getObjects());
}
Data data = new Data();
data.addData(allData);
data.setStatus(deriveTaskStatus(statusData));
switch (data.getStatus()) {
case SUCCESS:
for (DataType d : consumes) {
if (!allData.containsKey(d)) {
throw new MissingDataException("Task " + name + " Missing input data for "
+ d);
}
}
break;
case SKIPPED:
throw new SkippedTaskException("Previous Task was skipped");
case ERROR:
throw new ErrorTaskException("Previous Task failed");
}
return data;
}
private TaskStatus deriveTaskStatus(Map<TaskStatus, Data> statusData) {
if (statusData.containsKey(TaskStatus.ERROR))
return TaskStatus.ERROR;
if (statusData.containsKey(TaskStatus.SKIPPED))
return TaskStatus.SKIPPED;
return TaskStatus.SUCCESS;
}
private Result postProcess(Data outputData) throws MissingDataException {
Result result = new Result();
for (DataType d : endpoints) {
if (!outputData.getObjects().containsKey(d)) {
throw new MissingDataException("Missing end point data for " + d);
}
result.addData(d, outputData.getObject(d));
}
return result;
}
@Override
public void run() {
System.out.println("Running task: " + name);
try {
Data inputData = preProcess();
data = process(context, inputData);
result = postProcess(data);
} catch (MissingDataException | SkippedTaskException
| ErrorTaskException e) {
data = new Data(TaskStatus.SKIPPED, new Error("SKIP_TASK",
"Skip Task", e));
e.printStackTrace();
} catch (TaskException e) {
data = new Data(TaskStatus.ERROR, new Error("PREV_ERROR",
"Error in dependent task", e));
e.printStackTrace();
}
}
public abstract Data process(TaskContext context, Data data)
throws TaskException;
@Override
public void addData(Data data) {
this.prevData.add(data);
}
public TaskStatus getStatus() {
return status;
}
public void setStatus(TaskStatus status) {
this.status = status;
}
public Result getResult() {
return result;
}
public Set<DataType> getProduces() {
return produces;
}
public void setProduces(Set<DataType> produces) {
this.produces = produces;
}
public Set<DataType> getConsumes() {
return consumes;
}
public void setConsumes(Set<DataType> consumes) {
this.consumes = consumes;
}
public Data getData() {
return data;
}
@Override
public String toString() {
return "[" + name + "]";
}
@Override
public TaskInfo getTaskInfo() {
return new TaskInfo(this.identifier, this.name, this.status,
this.startTime, this.endTime);
}
public Set<DataType> getEndpoints() {
return endpoints;
}
public void setEndpoints(Set<DataType> endpoints) {
this.endpoints = endpoints;
}
}
【问题讨论】:
-
查看 Java 8 的 CompletableFuture。您可以将任务提交到分叉连接池并连接函数或在任务完成时调用其他任务。你也可以将它与 Java 8 Streams 结合起来。
-
感谢@Jacek 的编辑。使用 CompleteableFuture,我最终可能会在每个阶段都手持任务,并且可能不会成为提交任务拓扑的通用解决方案。遵循有人发布的方法来编写我自己的 Graph Executor。请参考上面的示例。它可以工作,但是我最终为每个请求创建了新的 ThreadExecutor。正在考虑是否无论如何我可以重用线程池。在最后一项任务的情况下关机阻止我这样做。
标签: java akka reactive-programming rx-java