python操作hdfs_python 操作hdfs
from hdfs.client importClient#關于python操作hdfs的API可以查看官網:#https://hdfscli.readthedocs.io/en/latest/api.html
#讀取hdfs文件內容,將每行存入數組返回
defread_hdfs_file(client, filename):#with client.read('samples.csv', encoding='utf-8', delimiter='\n') as reader:
#for line in reader:
#pass
lines =[]
with client.read(filename, encoding='utf-8', delimiter='\n') as reader:for line inreader:#pass
#print line.strip()
lines.append(line.strip())returnlines#創建目錄
defmkdirs(client, hdfs_path):
client.makedirs(hdfs_path)#刪除hdfs文件
defdelete_hdfs_file(client, hdfs_path):
client.delete(hdfs_path)#上傳文件到hdfs
defput_to_hdfs(client, local_path, hdfs_path):
client.upload(hdfs_path, local_path, cleanup=True)#從hdfs獲取文件到本地
defget_from_hdfs(client, hdfs_path, local_path):
client.download(hdfs_path, local_path, overwrite=False)#追加數據到hdfs文件
defappend_to_hdfs(client, hdfs_path, data):
client.write(hdfs_path, data, overwrite=False, append=True, encoding='utf-8')#覆蓋數據寫到hdfs文件
defwrite_to_hdfs(client, hdfs_path, data):
client.write(hdfs_path, data, overwrite=True, append=False, encoding='utf-8')#移動或者修改文件
defmove_or_rename(client, hdfs_src_path, hdfs_dst_path):
client.rename(hdfs_src_path, hdfs_dst_path)#返回目錄下的文件
deflist(client, hdfs_path):return client.list(hdfs_path, status=False)#client = Client(url, root=None, proxy=None, timeout=None, session=None)#client = Client("http://hadoop:50070")
client = Client("http://120.78.186.82:50070/",root="/",timeout=10000,session=False)#client = InsecureClient("http://120.78.186.82:50070", user='ann');
#move_or_rename(client,'/input/2.csv', '/input/emp.csv')#read_hdfs_file(client,'/input/emp.csv')
put_to_hdfs(client, 'D:\\bbb.txt', '/file')#append_to_hdfs(client,'/input/emp.csv','我愛你'+'\n')#write_to_hdfs(client, '/emp.csv', "sadfafdadsf")#read_hdfs_file(client,'/input/emp.csv')#move_or_rename(client,'/input/emp.csv', '/input/2.csv')#mkdirs(client,'/input/python')#print(list(client, '/'))#chown(client,'/input/1.csv', 'root')
總結
以上是生活随笔為你收集整理的python操作hdfs_python 操作hdfs的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 安居客检测到网页抓取_原创内容不收录 解
- 下一篇: timer定时器_拾遗Timer定时器