【发布时间】:2017-02-20 02:39:10
【问题描述】:
我收到多火花上下文错误。
谁能帮我解决这个问题?
如果我选择parsing.take(1),它运行良好。但是当我在代码的最后一行执行take > 2 时,它会给出多个火花上下文错误。
非常感谢任何帮助
from pyspark import SparkConf
from pyspark import SparkContext
sc = SparkContext()
from pyspark.sql import SQLContext
sqlContext = SQLContext(sc)
############ IRIS DataSet ##############
iris= sc.textFile("hdfs:///user/edureka/IRIS.csv")
testset,trainingset = iris.randomSplit([1,2])
import numpy as np
def parse_interaction(line):
line_split = line.split(",")
# keep just numeric and logical values
symbolic_indexes = [4] # Specify the columns which has the String values
features = [item for i,item in enumerate(line_split) if i not in symbolic_indexes]
return np.array([float(x) for x in features])
def parse_interaction_label(line):
line_split = line.split(",")
# keep just numeric and logical values
symbolic_indexes = [4] # Specify the columns which has the String values
label = [item for i,item in enumerate(line_split) if i in symbolic_indexes]
return np.array([float(x) for x in label])
features_train = trainingset.map(parse_interaction)
labels_train = trainingset.map(parse_interaction_label)
features_test=testset.map(parse_interaction)
labels_test=testset.map(parse_interaction_label)
def parse_interaction_with_key(line):
line_split = line.split(",")
# keep just numeric and logical values
#symbolic_indexes = [4] # Specify the columns which has the String values
features_label = [item for i,item in enumerate(line_split)]
return (np.array([float(x) for x in features_label]))
features_train_label = trainingset.map(parse_interaction_with_key)
features_test_label= testset.map(parse_interaction_with_key)
product=features_train_label.cartesian(features_test_label)
import math
def distancecal(line):
training_label=line[0]
training=training_label[0:4] # hardcoded the Training Column
train_label = training_label[-1]
testing_label=line[1]
test=testing_label[0:4] # Hardcoded the Testing column Modified the Testing Column
stringtest=str(line[1])
points=zip(training,test)
diffs_squared_distance = [pow(a - b, 2) for (a, b) in points]
score = math.sqrt(sum(diffs_squared_distance))
training_label = np.append(training_label,score)
return (stringtest,training_label)
training_label_test_score = product.map(distancecal)
keyvalue=training_label_test_score.groupByKey().mapValues(list)
def sortingvalue(l):
from pyspark import SparkConf
from pyspark import SparkContext
#conf1 = SparkConf()
#conf1.setAppName('Sorting Job Stream')
#conf1.set("spark.ui.port", "36000")
#conf1.set("spark.driver.allowMultipleContexts", "true")
sc1 = SparkContext()
v = sc1.parallelize(l)
vSorted = v.sortBy(lambda a: a[5])
return(vSorted.collect())
def parsekeyvalueforsorting(line):
key=line[0]
cdata=line[1]
scdata=sortingvalue(cdata)
return (key,scdata)
parsing=keyvalue.map(parsekeyvalueforsorting)
print(parsing.take(2))
这是我的列表,第一个元素是字符串,第二个是数组列表:
[('[ 0.2 1.4 3.4 5.2 0. ]', [array([ 0.2, 1.4, 3. , 4.9, 0. , **0.5**]), array([ 0.2 , 1.3 , 3.2 , 4.7 , 0. ,**0.54772256**]), array([ 0.2 , 1.4 , 3.6 , 5. , 0. ,
0.28284271]), array([ 0.4 , 1.7 , 3.9 , 5.4 , 0. ,
0.64807407]), array([ 0.2 , 1.5 , 3.4 , 5. , 0. ,
0.2236068]), array([ 0.2 , 1.4 , 2.9 , 4.4 , 0. ,
0.94339811]), array([ 0.1 , 1.5 , 3.1 , 4.9 , 0. ,
0.4472136]), array([ 0.2 , 1.5 , 3.7 , 5.4 , 0. ,
0.37416574]), array([ 0.2 , 1.6 , 3.4 , 4.8 , 0. ,
0.4472136]), array([ 0.1 , 1.4 , 3. , 4.8 , 0. ,
0.57445626]), array([ 0.1 , 1.1 , 3. , 4.3 , 0. ,
1.03440804]), array([ 0.4 , 1.5 , 4.4 , 5.7 , 0. ,
1.14017543]), array([ 0.4 , 1.3 , 3.9 , 5.4 , 0. ,
0.58309519]), array([ 0.3 , 1.7 , 3.8 , 5.7 , 0. ,
0.71414284]), array([ 0.3 , 1.5 , 3.8 , 5.1 , 0. ,
0.43588989]), array([ 0.2 , 1.7 , 3.4 , 5.4 , 0. ,
0.36055513]), array([ 0.4 , 1.5 , 3.7 , 5.1 , 0. ,
0.38729833]), array([ 0.2 , 1. , 3.6 , 4.6 , 0. ,
0.74833148]), array([ 0.5 , 1.7 , 3.3 , 5.1 , 0. ,
0.4472136]), array([ 0.2 , 1.9 , 3.4 , 4.8 , 0. ,
0.64031242]), array([ 0.2 , 1.6 , 3. , 5. , 0. ,
0.48989795]), array([ 0.4 , 1.6 , 3.4 , 5. , 0. ,
0.34641016]), array([ 0.2 , 1.5 , 3.5 , 5.2 , 0. ,
0.14142136]), array([ 0.4, 1.5, 3.4, 5.4, 0. , 0.3]), array([ 0.2 , 1.5 , 3.1 , 4.9 , 0. ,
0.43588989]), array([ 0.2 , 1.2 , 3.2 , 5. , 0. ,
0.34641016]), array([ 0.2 , 1.3 , 3.5 , 5.5 , 0. ,
0.33166248]), array([ 0.2 , 1.5 , 3.4 , 5.1 , 0. ,
0.14142136]), array([ 0.3 , 1.3 , 2.3 , 4.5 , 0. ,
1.3114877]), array([ 0.4 , 1.9 , 3.8 , 5.1 , 0. , 0.678233]), array([ 0.3 , 1.4 , 3. , 4.8 , 0. ,
0.57445626]), array([ 0.2 , 1.6 , 3.8 , 5.1 , 0. ,
0.45825757]), array([ 0.2 , 1.4 , 3.2 , 4.6 , 0. ,
0.63245553]), array([ 0.2 , 1.5 , 3.7 , 5.3 , 0. ,
0.33166248]), array([ 0.2 , 1.4 , 3.3 , 5. , 0. ,
0.2236068]), array([ 1.3 , 4. , 2.3 , 5.5 , 1. ,
3.04466747]), array([ 1.5 , 4.6 , 2.8 , 6.5 , 1. ,
3.73898382]), array([ 1.3 , 4.6 , 2.9 , 6.6 , 1. ,
3.69594372]), array([ 1.4 , 3.9 , 2.7 , 5.2 , 1. ,
2.86006993]), array([ 1.5 , 4.2 , 3. , 5.9 , 1. ,
3.19061123]), array([ 1. , 4. , 2.2 , 6. , 1. ,
3.07896086]), array([ 1.3 , 3.6 , 2.9 , 5.6 , 1. ,
2.54165301]), array([ 1.5 , 4.5 , 3. , 5.6 , 1. ,
3.40881211]), array([ 1. , 4.1 , 2.7 , 5.8 , 1. ,
2.96310648]), array([ 1.5 , 4.5 , 2.2 , 6.2 , 1. ,
3.7067506]), array([ 1.3 , 4. , 2.8 , 6.1 , 1. ,
3.02324329]), array([ 1.5 , 4.9 , 2.5 , 6.3 , 1. ,
3.99499687]), array([ 1.2 , 4.7 , 2.8 , 6.1 , 1. ,
3.6138622]), array([ 1.3 , 4.3 , 2.9 , 6.4 , 1. ,
3.36303434]), array([ 1.4 , 4.8 , 2.8 , 6.8 , 1. ,
3.98998747]), array([ 1.7 , 5. , 3. , 6.7 , 1. ,
4.19761837]), array([ 1.5 , 4.5 , 2.9 , 6. , 1. ,
3.49141805]), array([ 1. , 3.5 , 2.6 , 5.7 , 1. ,
2.43721152]), array([ 1.1 , 3.8 , 2.4 , 5.5 , 1. ,
2.7676705]), array([ 1. , 3.7 , 2.4 , 5.5 , 1. ,
2.64952826]), array([ 1.2 , 3.9 , 2.7 , 5.8 , 1. ,
2.84604989]), array([ 1.6 , 5.1 , 2.7 , 6. , 1. ,
4.09633983]), array([ 1.5 , 4.5 , 3. , 5.4 , 1. ,
3.39116499]), array([ 1.3 , 4.4 , 2.3 , 6.3 , 1. ,
3.55387113]), array([ 1.3 , 4. , 2.5 , 5.5 , 1. ,
2.97825452]), array([ 1.2 , 4.4 , 2.6 , 5.5 , 1. ,
3.27566787]), array([ 1. , 3.3 , 2.3 , 5. , 1. ,
2.34520788]), array([ 1.3 , 4.2 , 2.7 , 5.6 , 1. ,
3.1144823]), array([ 1.2 , 4.2 , 3. , 5.7 , 1. ,
3.04138127]), array([ 1.3 , 4.3 , 2.9 , 6.2 , 1. ,
3.2969683]), array([ 2.5 , 6. , 3.3 , 6.3 , 2. ,
5.26022813]), array([ 1.9 , 5.1 , 2.7 , 5.8 , 2. ,
4.17492515]), array([ 2.1 , 5.9 , 3. , 7.1 , 2. ,
5.25642464]), array([ 1.8 , 5.6 , 2.9 , 6.3 , 2. ,
4.65403051]), array([ 2.2 , 5.8 , 3. , 6.5 , 2. ,
5.02095608]), array([ 1.8 , 6.3 , 2.9 , 7.3 , 2. ,
5.5883808]), array([ 1.8 , 5.8 , 2.5 , 6.7 , 2. ,
4.9979996]), array([ 2.5 , 6.1 , 3.6 , 7.2 , 2. ,
5.60535458]), array([ 2. , 5.1 , 3.2 , 6.5 , 2. ,
4.31972221]), array([ 1.9 , 5.3 , 2.7 , 6.4 , 2. ,
4.4754888]), array([ 2.1 , 5.5 , 3. , 6.8 , 2. ,
4.81040539]), array([ 2. , 5. , 2.5 , 5.7 , 2. ,
4.15451562]), array([ 1.8 , 5.5 , 3. , 6.5 , 2. ,
4.60651712]), array([ 2.2 , 6.7 , 3.8 , 7.7 , 2. ,
6.20483682]), array([ 2.3 , 6.9 , 2.6 , 7.7 , 2. ,
6.44592895]), array([ 1.5 , 5. , 2.2 , 6. , 2. ,
4.09023227]), array([ 2. , 4.9 , 2.8 , 5.6 , 2. ,
4.0012498]), array([ 1.8 , 4.9 , 2.7 , 6.3 , 2. ,
4.06324993]), array([ 1.8 , 6. , 3.2 , 7.2 , 2. ,
5.26877595]), array([ 1.6 , 5.8 , 3. , 7.2 , 2. ,
5.04777179]), array([ 2. , 6.4 , 3.8 , 7.9 , 2. ,
5.97411081]), array([ 2.2 , 5.6 , 2.8 , 6.4 , 2. ,
4.84148737]), array([ 1.5 , 5.1 , 2.8 , 6.3 , 2. ,
4.11703777]), array([ 2.3 , 6.1 , 3. , 7.7 , 2. ,
5.7367238]), array([ 2.4 , 5.6 , 3.4 , 6.3 , 2. ,
4.86723741]), array([ 1.8 , 5.5 , 3.1 , 6.4 , 2. ,
4.57165178]), array([ 2.4 , 5.6 , 3.1 , 6.7 , 2. ,
4.98196748]), array([ 2.3 , 5.1 , 3.1 , 6.9 , 2. ,
4.59129611]), array([ 2.3 , 5.9 , 3.2 , 6.8 , 2. ,
5.22111099]), array([ 2.5 , 5.7 , 3.3 , 6.7 , 2. ,
5.10294033]), array([ 2.3 , 5.2 , 3. , 6.7 , 2. ,
4.61085675]), array([ 1.9 , 5. , 2.5 , 6.3 , 2. ,
4.22729228]), array([ 2.3 , 5.4 , 3.4 , 6.2 , 2. ,
4.62709412]), array([ 1.8 , 5.1 , 3. , 5.9 , 2. ,
4.11096096])]), ('[ 0.3 1.4 3.4 4.6 0. ]', [array([ 0.2 , 1.4 , 3. , 4.9 , 0. ,
0.50990195]), array([ 0.2 , 1.3 , 3.2 , 4.7 , 0. ,
0.26457513]), array([ 0.2 , 1.4 , 3.6 , 5. , 0. ,
0.45825757]), array([ 0.4 , 1.7 , 3.9 , 5.4 , 0. ,
0.99498744]), array([ 0.2 , 1.5 , 3.4 , 5. , 0. ,
0.42426407]), array([ 0.2 , 1.4 , 2.9 , 4.4 , 0. ,
0.54772256]), array([ 0.1 , 1.5 , 3.1 , 4.9 , 0. ,
0.47958315]), array([ 0.2 , 1.5 , 3.7 , 5.4 , 0. ,
0.8660254]), array([ 0.2, 1.6, 3.4, 4.8, 0. , 0.3]), array([ 0.1 , 1.4 , 3. , 4.8 , 0. ,
0.48989795]), array([ 0.1 , 1.1 , 3. , 4.3 , 0. ,
0.6164414]), array([ 0.4 , 1.5 , 4.4 , 5.7 , 0. ,
1.49331845]), array([ 0.4 , 1.3 , 3.9 , 5.4 , 0. ,
0.9539392]), array([ 0.3 , 1.7 , 3.8 , 5.7 , 0. ,
1.2083046]), array([ 0.3 , 1.5 , 3.8 , 5.1 , 0. ,
0.64807407]), array([ 0.2 , 1.7 , 3.4 , 5.4 , 0. ,
0.86023253]), array([ 0.4, 1.5, 3.7, 5.1, 0. , 0.6]), array([ 0.2 , 1. , 3.6 , 4.6 , 0. ,
0.45825757]), array([ 0.5 , 1.7 , 3.3 , 5.1 , 0. ,
0.6244998]), array([ 0.2 , 1.9 , 3.4 , 4.8 , 0. ,
0.54772256]), array([ 0.2 , 1.6 , 3. , 5. , 0. ,
0.60827625]), array([ 0.4 , 1.6 , 3.4 , 5. , 0. ,
0.45825757]), array([ 0.2 , 1.5 , 3.5 , 5.2 , 0. ,
0.6244998]), array([ 0.4 , 1.5 , 3.4 , 5.4 , 0. ,
0.81240384]), array([ 0.2 , 1.5 , 3.1 , 4.9 , 0. ,
0.4472136]), array([ 0.2, 1.2, 3.2, 5. , 0. , 0.5]), array([ 0.2 , 1.3 , 3.5 , 5.5 , 0. ,
0.91651514]), array([ 0.2 , 1.5 , 3.4 , 5.1 , 0. ,
0.51961524]), array([ 0.3 , 1.3 , 2.3 , 4.5 , 0. ,
1.10905365]), array([ 0.4 , 1.9 , 3.8 , 5.1 , 0. ,
0.81853528]), array([ 0.3 , 1.4 , 3. , 4.8 , 0. ,
0.4472136]), array([ 0.2 , 1.6 , 3.8 , 5.1 , 0. , 0.678233]), array([ 0.2 , 1.4 , 3.2 , 4.6 , 0. ,
0.2236068]), array([ 0.2 , 1.5 , 3.7 , 5.3 , 0. ,
0.77459667]), array([ 0.2 , 1.4 , 3.3 , 5. , 0. ,
0.42426407]), array([ 1.3 , 4. , 2.3 , 5.5 , 1. ,
3.12729915]), array([ 1.5 , 4.6 , 2.8 , 6.5 , 1. ,
3.95600809]), array([ 1.3 , 4.6 , 2.9 , 6.6 , 1. ,
3.93573373]), array([ 1.4 , 3.9 , 2.7 , 5.2 , 1. ,
2.88270706]), array([ 1.5 , 4.2 , 3. , 5.9 , 1. ,
3.33616546]), array([ 1. , 4. , 2.2 , 6. , 1. ,
3.26343377]), array([ 1.3 , 3.6 , 2.9 , 5.6 , 1. ,
2.66270539]), array([ 1.5 , 4.5 , 3. , 5.6 , 1. ,
3.49428104]), array([ 1. , 4.1 , 2.7 , 5.8 , 1. ,
3.11608729]), array([ 1.5 , 4.5 , 2.2 , 6.2 , 1. ,
3.87943295]), array([ 1.3 , 4. , 2.8 , 6.1 , 1. ,
3.22024844]), array([ 1.5 , 4.9 , 2.5 , 6.3 , 1. ,
4.17013189]), array([ 1.2 , 4.7 , 2.8 , 6.1 , 1. ,
3.78285606]), array([ 1.3 , 4.3 , 2.9 , 6.4 , 1. , 3.591657]), array([ 1.4 , 4.8 , 2.8 , 6.8 , 1. ,
4.23910368]), array([ 1.7 , 5. , 3. , 6.7 , 1. ,
4.41474801]), array([ 1.5 , 4.5 , 2.9 , 6. , 1. ,
3.64142829]), array([ 1. , 3.5 , 2.6 , 5.7 , 1. ,
2.59807621]), array([ 1.1 , 3.8 , 2.4 , 5.5 , 1. ,
2.86530976]), array([ 1. , 3.7 , 2.4 , 5.5 , 1. ,
2.75499546]), array([ 1.2 , 3.9 , 2.7 , 5.8 , 1. ,
2.99833287]), array([ 1.6 , 5.1 , 2.7 , 6. , 1. ,
4.22255847]), array([ 1.5 , 4.5 , 3. , 5.4 , 1. ,
3.4423829]), array([ 1.3 , 4.4 , 2.3 , 6.3 , 1. ,
3.75499667]), array([ 1.3 , 4. , 2.5 , 5.5 , 1. ,
3.06267857]), array([ 1.2 , 4.4 , 2.6 , 5.5 , 1. ,
3.35559235]), array([ 1. , 3.3 , 2.3 , 5. , 1. ,
2.33880311]), array([ 1.3 , 4.2 , 2.7 , 5.6 , 1. ,
3.21403174]), array([ 1.2 , 4.2 , 3. , 5.7 , 1. ,
3.16543836]), array([ 1.3 , 4.3 , 2.9 , 6.2 , 1. ,
3.49571166]), array([ 2.5 , 6. , 3.3 , 6.3 , 2. ,
5.37587202]), array([ 1.9 , 5.1 , 2.7 , 5.8 , 2. ,
4.26380112]), array([ 2.1 , 5.9 , 3. , 7.1 , 2. ,
5.46808925]), array([ 1.8 , 5.6 , 2.9 , 6.3 , 2. ,
4.79895822]), array([ 2.2 , 5.8 , 3. , 6.5 , 2. ,
5.17107339]), array([ 1.8 , 6.3 , 2.9 , 7.3 , 2. ,
5.81377674]), array([ 1.8 , 5.8 , 2.5 , 6.7 , 2. ,
5.17976833]), array([ 2.5 , 6.1 , 3.6 , 7.2 , 2. ,
5.80775344]), array([ 2. , 5.1 , 3.2 , 6.5 , 2. ,
4.49777723]), array([ 1.9 , 5.3 , 2.7 , 6.4 , 2. ,
4.63680925]), array([ 2.1 , 5.5 , 3. , 6.8 , 2. ,
5.0049975]), array([ 2. , 5. , 2.5 , 5.7 , 2. ,
4.22729228]), array([ 1.8 , 5.5 , 3. , 6.5 , 2. ,
4.77807493]), array([ 2.2 , 6.7 , 3.8 , 7.7 , 2. ,
6.43972049]), array([ 2.3 , 6.9 , 2.6 , 7.7 , 2. ,
6.67083203]), array([ 1.5 , 5. , 2.2 , 6. , 2. ,
4.21900462]), array([ 2. , 4.9 , 2.8 , 5.6 , 2. ,
4.0620192]), array([ 1.8 , 4.9 , 2.7 , 6.3 , 2. ,
4.2284749]), array([ 1.8 , 6. , 3.2 , 7.2 , 2. ,
5.49636243]), array([ 1.6 , 5.8 , 3. , 7.2 , 2. ,
5.28866713]), array([ 2. , 6.4 , 3.8 , 7.9 , 2. ,
6.2401923]), array([ 2.2 , 5.6 , 2.8 , 6.4 , 2. ,
4.98497743]), array([ 1.5 , 5.1 , 2.8 , 6.3 , 2. ,
4.28719022]), array([ 2.3 , 6.1 , 3. , 7.7 , 2. ,
5.98832197]), array([ 2.4 , 5.6 , 3.4 , 6.3 , 2. ,
4.9939964]), array([ 1.8 , 5.5 , 3.1 , 6.4 , 2. ,
4.73180727]), array([ 2.4 , 5.6 , 3.1 , 6.7 , 2. ,
5.15266921]), array([ 2.3 , 5.1 , 3.1 , 6.9 , 2. ,
4.80312398]), array([ 2.3 , 5.9 , 3.2 , 6.8 , 2. ,
5.39722151]), array([ 2.5 , 5.7 , 3.3 , 6.7 , 2. ,
5.26782688]), array([ 2.3 , 5.2 , 3. , 6.7 , 2. ,
4.79687398]), array([ 1.9 , 5. , 2.5 , 6.3 , 2. ,
4.38406204]), array([ 2.3 , 5.4 , 3.4 , 6.2 , 2. ,
4.74973683]), array([ 1.8 , 5.1 , 3. , 5.9 , 2. ,
4.21781934])])]
我需要根据所有列表中的粗体值(第 5 个元素)对数组的第二个元素进行排序
【问题讨论】:
标签: apache-spark pyspark spark-streaming