python调用api接口获取数据_python批量爬取NCBI基因注释并调用谷歌API批量翻译
生活随笔
收集整理的這篇文章主要介紹了
python调用api接口获取数据_python批量爬取NCBI基因注释并调用谷歌API批量翻译
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
作者:沙雕學習小組
這里有視頻教程:https://www.bilibili.com/video/av87724182
今天想實現這個功能:
差異分析得到了200多個基因(甚至更多)
我要一個一個把基因的summary信息得到,要手動一個一個查可能要查到下個星期,周五就要匯報了啊……!
2
有python怕啥?!不要慌
動手之前先動腦
step1:獲取這個基因在NCBI上的summary信息——輸入gene.txt得到genesummary.txtstep2:檢查輸出文件是否有空行,若有刪掉輸入genesummary.txt得到newsummary.txt
step3:批量翻譯——輸入newsummary.txt,得到genetrans.txt
step1:獲取這個基因在NCBI上的summary信息——輸入gene.txt得到genesummary.txt
#!/usr/bin/env python # -*- coding:utf-8 -*- # Author:Abaofrom Bio import Entrez # pip install biopython #from translate_api.translate_api import api # pip install translate_api #from Pytrans import * import reEntrez.email = "shinningbzw@foxmail.com" # email#這里修改文件路徑和文件名,絕對路徑 output_file = 'genesummary.txt' # 注意你的輸出文件路徑:絕對路徑 input_file = 'gene.txt'# 輸入文件:去重后的基因列表 (將基因列保存為 txt,uniq *.txt>gene_list.txt )gene_list = [] line_c = [] count = len(open(input_file, 'r').readlines()) print("Waiting...")#from Pytrans import * import requests from Pytrans import *def google_translate(content):'''google translation'''js = Pytrans()tk = js.getTk(content)if len(content) > 4891:print("too long!!!")returnparam = {'tk': tk, 'q': content}result = requests.get("""http://translate.google.cn/translate_a/single?client=t&sl=en&tl=zh-CN&hl=zh-CN&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&ie=UTF-8&oe=UTF-8&clearbtn=1&otf=1&pc=1&srcrom=0&ssel=0&tsel=0&kc=2""", params=param)trans = result.json()[0]ret = ''# for i in range(len(trans)):# line = trans[i][0]# if line != None:# ret += trans[i][0]for i in range(len(trans)):line = trans[i][0]if line != None:ret += trans[i][0]return ret#a = google_translate("hello,Input file will be translated, please be patient") #print(a)# get gene list for line in open(input_file):if line != "基因":gene_list.append(line)gene_list.remove(gene_list[0]) rm_pattern = re.compile('[.*?]')with open(output_file, 'a+', encoding='utf-8') as f:for line in gene_list:gene = str(line.strip())gene_term = "(" + gene +"[Gene Name]) AND Homo sapiens[Organism]"Entrez.email = "shinningbzw@foxmail.com"handle = Entrez.esearch(db="gene", term=gene_term)gene_id = Entrez.read(handle)['IdList'][0]sum_handle = Entrez.esummary(db="gene", id=gene_id)sum_record = Entrez.read(sum_handle)r_gene_sum = sum_record['DocumentSummarySet']['DocumentSummary'][0]['Summary']gene_sum = rm_pattern.sub('', r_gene_sum)#translation = google_translate(gene_sum)#f.write(gene + "n" + gene_sum + "n" + translation + "n")f.write(gene + "n" + gene_sum + "n" )line_c.append("b")if count % len(line_c) == 0:perc = (len(line_c) / count) * 100print("Completed " + str(int(perc)) + "%")step2:檢查輸出文件是否有空行,若有刪掉。輸入genesummary.txt得到newsummary.txt
#!/usr/bin/env python # -*- coding:utf-8 -*- # Author:cici#這里修改你的文件路徑,請看清文件名奧~ with open('genesummary.txt', 'r', encoding='utf-8') as fr, open('newsummary.txt', 'w', encoding='utf-8') as fd:for text in fr.readlines():if text.split():fd.write(text)print('輸出成功....')step3:批量翻譯
這里先寫個函數
#!/usr/bin/env python # -*- coding:utf-8 -*- # Author:Topshiimport execjsclass Pytrans():def __init__(self):self.ctx = execjs.compile("""function TL(a) {var k = "";var b = 406644;var b1 = 3293161072;var jd = ".";var $b = "+-a^+6";var Zb = "+-3^+b+-f";for (var e = [], f = 0, g = 0; g < a.length; g++) {var m = a.charCodeAt(g);128 > m ? e[f++] = m : (2048 > m ? e[f++] = m >> 6 | 192 : (55296 == (m & 64512) && g + 1 < a.length && 56320 == (a.charCodeAt(g + 1) & 64512) ? (m = 65536 + ((m & 1023) << 10) + (a.charCodeAt(++g) & 1023),e[f++] = m >> 18 | 240,e[f++] = m >> 12 & 63 | 128) : e[f++] = m >> 12 | 224,e[f++] = m >> 6 & 63 | 128),e[f++] = m & 63 | 128)}a = b;for (f = 0; f < e.length; f++) a += e[f],a = RL(a, $b);a = RL(a, Zb);a ^= b1 || 0;0 > a && (a = (a & 2147483647) + 2147483648);a %= 1E6;return a.toString() + jd + (a ^ b)};function RL(a, b) {var t = "a";var Yb = "+";for (var c = 0; c < b.length - 2; c += 3) {var d = b.charAt(c + 2),d = d >= t ? d.charCodeAt(0) - 87 : Number(d),d = b.charAt(c + 1) == Yb ? a >>> d: a << d;a = b.charAt(c) == Yb ? a + d & 4294967295 : a ^ d}return a}""")def getTk(self, text):return self.ctx.call("TL", text)調用這個函數——輸入newsummary.txt,得到genetrans.txt
#!/usr/bin/env python # -*- coding:utf-8 -*- # Author:Topshi from Pytrans import * import requestsdef google_translate(content):'''google translation'''js = Pytrans()tk = js.getTk(content)if len(content) > 4891:print("too long!!!")returnparam = {'tk': tk, 'q': content}result = requests.get("""http://translate.google.cn/translate_a/single?client=t&sl=en&tl=zh-CN&hl=zh-CN&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&ie=UTF-8&oe=UTF-8&clearbtn=1&otf=1&pc=1&srcrom=0&ssel=0&tsel=0&kc=2""", params=param)trans = result.json()[0]ret = ''for i in range(len(trans)):line = trans[i][0]if line != None:ret += trans[i][0]return reta = google_translate("hello,Input file will be translated, please be patient") print(a)genotype_annotation_list = []translate_file = open('genetrans.txt', "a+", encoding='utf-8')with open('newsummary.txt', 'r') as f: #有空行會報錯!!for element in f:genotype_annotation_list.append(element.strip()) # print(genotype_annotation_list) count = 0 for ga in genotype_annotation_list:translation = google_translate(ga)#translate_file.write(ga + 't' + translation + 'n')translate_file.write(translation + 'n')count += 1print('complete', '%.1f%%' % ((count / len(genotype_annotation_list)) * 100))歡迎關注我們
總結
以上是生活随笔為你收集整理的python调用api接口获取数据_python批量爬取NCBI基因注释并调用谷歌API批量翻译的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 简单记录:如何用CSS3做出对勾 √ 的
- 下一篇: Walkway.js – 用线条制作简约