编辑
mkdir ~/.ipython/kernels/pyspark
vim ~/.ipython/kernels/pyspark/kernel.json
kernel.json 内容
{
"display_name": "pySpark",
"language": "python",
"argv": [
"/var/local/anaconda2/bin/python",
"-m",
"IPython.kernel",
"-f",
"{connection_file}"
],
"env": {
"JAVA_HOME": "/opt/jdk8",
"SPARK_HOME": "/usr/hdp/3.0.1.0-187/spark2",
"PYTHONPATH": "/usr/hdp/3.0.1.0-187/spark2/python:/usr/hdp/3.0.1.0-187/spark2/python/lib/py4j-0.10.7-src.zip",
"PYTHONSTARTUP": "/usr/hdp/3.0.1.0-187/spark2/python/pyspark/shell.py ",
"PYSPARK_SUBMIT_ARGS": "pyspark-shell"
}
}
实验验证
import os
#os.environ['SPARK_HOME']='/usr/hdp/3.0.1.0-187/spark2/'
#os.environ['JAVA_HOME']='/opt/jdk8'
from pyspark import SparkContext, SparkConf
# #Spark Config
conf=SparkConf().setAppName("testspark").setMaster("spark://10.244.0.29:7077")
sc = SparkContext(conf=conf)
text_file = sc.textFile("hdfs:///root/test/spark/test.txt")
counts = text_file.flatMap(lambda line: line.split(" ")) \
.map(lambda word: (word, 1)) \
.reduceByKey(lambda a, b: a + b)
print counts