将QuickDraw数据集ndjson转为png图片
引言
Quick Draw Dataset 是一個包含345 個類別的 5000 萬幅繪圖的集合,由游戲Quick, Draw! 的玩家貢獻!.?這些繪圖被捕獲為帶時間戳的矢量,并用元數據標記,包括要求玩家繪制的內容以及玩家所在的國家/地區。您可以在quickdraw.withgoogle.com/data上瀏覽已識別的圖紙。
因為我需要對簡筆畫之類的數據集做分類,但是我手上的數據集太小,就需要大量的數據集做個大模型,然后在微調
數據集鏈接:wonderking/QuickDraw | 格物鈦,非結構化數據平臺Graviti提供基于SAAS模式的一站式數據管理,數據可視化,數據集供應,公開數據集獲取,數據標注,數據使用的AI數據平臺。https://gas.graviti.cn/dataset/wonderking/QuickDraw
正文
步驟:ndjson轉可以先轉換成json,然后json在轉換成png。
我會先把單個的代碼展示出來,最后有完整的代碼邏輯。
1、ndjson轉json
var fs = require('fs'); var ndjson = require('ndjson'); // npm install ndjsonfunction parseSimplifiedDrawings(fileName, callback) {var drawings = [];var fileStream = fs.createReadStream(fileName)fileStream.pipe(ndjson.parse()).on('data', function (obj) {drawings.push(obj)}).on("error", callback).on("end", function () {callback(null, drawings)}); }function tojson(filename) { //airplane.ndjsonvar list = filename.split(".")parseSimplifiedDrawings("D:\\my_py\\data\\QuickDrawsimplified\\" + filename, function (err, drawings) {if (err) return console.error(err);drawings.forEach(function (d) {// Do something with the drawingconsole.log(d.key_id, d.countrycode);})console.log("# of drawings:", drawings);var filename = "D:\\my_py\\data\\jsons\\"+list[0]+".json";//這里保存fs.writeFileSync(filename, JSON.stringify(drawings));//這里保存}) }要運行上面的js文件,需要先安裝nodejs,可以上網查教程。
需要調用tojson函數,傳入ndjison文件的地址,這個函數就會把ndjison轉換為json文件。別忘了改保存json的地址。
2、json轉png
f = open("D:\\my_py\\data\\jsons\\"+ list[0] + ".json") setting = json.load(f)for j in range(0, 200): # 轉化保存1000個圖for i in range(0, len(setting[j]['drawing'])):x = setting[j]['drawing'][i][0]y = setting[j]['drawing'][i][1]f = interpolate.interp1d(x, y, kind="slinear") # 線性插值pl.plot(x, y, 'k')ax = pl.gca() # 一個貓的所有線條畫一起ax.xaxis.set_ticks_position('top') # convert x,沒有ax這幾句貓就反著了ax.invert_yaxis()pl.axis('off')pl.savefig("D:\\my_py\\data\\images\\"+list[0]+"\\"+list[0]+"%d.png" % j) # 保存位置pl.close() # 不關閉的話所有圖都畫一起了f為打開的json文件,按照這個流程走,就可以轉換成為png文件了。
完整代碼
json_to_imgs.py的邏輯是:
通過讀List.txt里面的文件名,拼接成ndjson文件的路徑,先轉換成json,然后轉換成img圖片。一共有345類,每類1000張圖片。這里我只轉換了前200張。大概用了一個多小時。
imageTansform.py是將白底黑字的圖片轉換成黑底白字的圖片,看需求而定。
運行前需要先安裝一些依賴:
node.js,自己查教程安裝 pip install matplotlib pip install pillow pip install PyExecJS pip install scipy npm install ndjson?
json_to_imgs.py
import json from scipy import interpolate # pip install scipy import pylab as pl # pip install matplotlib import execjs # pip install PyExecJS import osdef js_from_file(file_name):"""讀取js文件:return:"""with open(file_name, 'r', encoding='UTF-8') as file:result = file.read()return resultif __name__ == '__main__':with open("List.txt", "r") as f:for line in f.readlines():filename = line.strip('\n') # 去掉列表中每一個元素的換行符list = filename.split('.')# 編譯加載js字符串ndjson_to_json = execjs.compile(js_from_file('ndjson_to_json.js'))try:ndjson_to_json.call("tojson", filename)except:passf = open("D:\\my_py\\data\\jsons\\"+ list[0] + ".json") # json文件所在絕對路徑if os.path.exists("E:\\data\\quickDraw\\images\\" + list[0]) is False:os.mkdir("D:\\my_py\\data\\images\\"+list[0])setting = json.load(f)for j in range(0, 200): # 轉化保存1000個圖for i in range(0, len(setting[j]['drawing'])):x = setting[j]['drawing'][i][0]y = setting[j]['drawing'][i][1]f = interpolate.interp1d(x, y, kind="slinear") # 線性插值pl.plot(x, y, 'k')ax = pl.gca() # 一個貓的所有線條畫一起ax.xaxis.set_ticks_position('top') # convert x,沒有ax這幾句貓就反著了ax.invert_yaxis()pl.axis('off')pl.savefig("D:\\my_py\\data\\images\\"+list[0]+"\\"+list[0]+"%d.png" % j) # 保存位置pl.close() # 不關閉的話所有圖都畫一起了ndjson_to_json.js
var fs = require('fs'); var ndjson = require('ndjson'); // npm install ndjsonfunction parseSimplifiedDrawings(fileName, callback) {var drawings = [];var fileStream = fs.createReadStream(fileName)fileStream.pipe(ndjson.parse()).on('data', function (obj) {drawings.push(obj)}).on("error", callback).on("end", function () {callback(null, drawings)}); }function tojson(filename) { //airplane.ndjsonvar list = filename.split(".")parseSimplifiedDrawings("D:\\my_py\\data\\QuickDrawsimplified\\" + filename, function (err, drawings) {if (err) return console.error(err);drawings.forEach(function (d) {// Do something with the drawingconsole.log(d.key_id, d.countrycode);})console.log("# of drawings:", drawings);var filename = "D:\\my_py\\data\\jsons\\"+list[0]+".json";//這里保存fs.writeFileSync(filename, JSON.stringify(drawings));//這里保存}) }?List.txt
aircraft carrier.ndjson airplane.ndjson alarm clock.ndjson ambulance.ndjson angel.ndjson animal migration.ndjson ant.ndjson anvil.ndjson apple.ndjson arm.ndjson asparagus.ndjson axe.ndjson backpack.ndjson banana.ndjson bandage.ndjson barn.ndjson baseball bat.ndjson baseball.ndjson basket.ndjson basketball.ndjson bat.ndjson bathtub.ndjson beach.ndjson bear.ndjson beard.ndjson bed.ndjson bee.ndjson belt.ndjson bench.ndjson bicycle.ndjson binoculars.ndjson bird.ndjson birthday cake.ndjson blackberry.ndjson blueberry.ndjson book.ndjson boomerang.ndjson bottlecap.ndjson bowtie.ndjson bracelet.ndjson brain.ndjson bread.ndjson bridge.ndjson broccoli.ndjson broom.ndjson bucket.ndjson bulldozer.ndjson bus.ndjson bush.ndjson butterfly.ndjson cactus.ndjson cake.ndjson calculator.ndjson calendar.ndjson camel.ndjson camera.ndjson camouflage.ndjson campfire.ndjson candle.ndjson cannon.ndjson canoe.ndjson car.ndjson carrot.ndjson castle.ndjson cat.ndjson ceiling fan.ndjson cell phone.ndjson cello.ndjson chair.ndjson chandelier.ndjson church.ndjson circle.ndjson clarinet.ndjson clock.ndjson cloud.ndjson coffee cup.ndjson compass.ndjson computer.ndjson cookie.ndjson cooler.ndjson couch.ndjson cow.ndjson crab.ndjson crayon.ndjson crocodile.ndjson crown.ndjson cruise ship.ndjson cup.ndjson diamond.ndjson dishwasher.ndjson diving board.ndjson dog.ndjson dolphin.ndjson donut.ndjson door.ndjson dragon.ndjson dresser.ndjson drill.ndjson drums.ndjson duck.ndjson dumbbell.ndjson ear.ndjson elbow.ndjson elephant.ndjson envelope.ndjson eraser.ndjson eye.ndjson eyeglasses.ndjson face.ndjson fan.ndjson feather.ndjson fence.ndjson finger.ndjson fire hydrant.ndjson fireplace.ndjson firetruck.ndjson fish.ndjson flamingo.ndjson flashlight.ndjson flip flops.ndjson floor lamp.ndjson flower.ndjson flying saucer.ndjson foot.ndjson fork.ndjson frog.ndjson frying pan.ndjson garden hose.ndjson garden.ndjson giraffe.ndjson goatee.ndjson golf club.ndjson grapes.ndjson grass.ndjson guitar.ndjson hamburger.ndjson hammer.ndjson hand.ndjson harp.ndjson hat.ndjson headphones.ndjson hedgehog.ndjson helicopter.ndjson helmet.ndjson hexagon.ndjson hockey puck.ndjson hockey stick.ndjson horse.ndjson hospital.ndjson hot air balloon.ndjson hot dog.ndjson hot tub.ndjson hourglass.ndjson house plant.ndjson house.ndjson hurricane.ndjson ice cream.ndjson jacket.ndjson jail.ndjson kangaroo.ndjson key.ndjson keyboard.ndjson knee.ndjson knife.ndjson ladder.ndjson lantern.ndjson laptop.ndjson leaf.ndjson leg.ndjson light bulb.ndjson lighter.ndjson lighthouse.ndjson lightning.ndjson line.ndjson lion.ndjson lipstick.ndjson lobster.ndjson lollipop.ndjson mailbox.ndjson map.ndjson marker.ndjson matches.ndjson megaphone.ndjson mermaid.ndjson microphone.ndjson microwave.ndjson monkey.ndjson moon.ndjson mosquito.ndjson motorbike.ndjson mountain.ndjson mouse.ndjson moustache.ndjson mouth.ndjson mug.ndjson mushroom.ndjson nail.ndjson necklace.ndjson nose.ndjson ocean.ndjson octagon.ndjson octopus.ndjson onion.ndjson oven.ndjson owl.ndjson paint can.ndjson paintbrush.ndjson palm tree.ndjson panda.ndjson pants.ndjson paper clip.ndjson parachute.ndjson parrot.ndjson passport.ndjson peanut.ndjson pear.ndjson peas.ndjson pencil.ndjson penguin.ndjson piano.ndjson pickup truck.ndjson picture frame.ndjson pig.ndjson pillow.ndjson pineapple.ndjson pizza.ndjson pliers.ndjson police car.ndjson pond.ndjson pool.ndjson popsicle.ndjson postcard.ndjson potato.ndjson power outlet.ndjson purse.ndjson rabbit.ndjson raccoon.ndjson radio.ndjson rain.ndjson rainbow.ndjson rake.ndjson remote control.ndjson rhinoceros.ndjson rifle.ndjson river.ndjson roller coaster.ndjson rollerskates.ndjson sailboat.ndjson sandwich.ndjson saw.ndjson saxophone.ndjson school bus.ndjson scissors.ndjson scorpion.ndjson screwdriver.ndjson sea turtle.ndjson see saw.ndjson shark.ndjson sheep.ndjson shoe.ndjson shorts.ndjson shovel.ndjson sink.ndjson skateboard.ndjson skull.ndjson skyscraper.ndjson sleeping bag.ndjson smiley face.ndjson snail.ndjson snake.ndjson snorkel.ndjson snowflake.ndjson snowman.ndjson soccer ball.ndjson sock.ndjson speedboat.ndjson spider.ndjson spoon.ndjson spreadsheet.ndjson square.ndjson squiggle.ndjson squirrel.ndjson stairs.ndjson star.ndjson steak.ndjson stereo.ndjson stethoscope.ndjson stitches.ndjson stop sign.ndjson stove.ndjson strawberry.ndjson streetlight.ndjson string bean.ndjson submarine.ndjson suitcase.ndjson sun.ndjson swan.ndjson sweater.ndjson swing set.ndjson sword.ndjson syringe.ndjson t-shirt.ndjson table.ndjson teapot.ndjson teddy-bear.ndjson telephone.ndjson television.ndjson tennis racquet.ndjson tent.ndjson The Eiffel Tower.ndjson The Great Wall of China.ndjson The Mona Lisa.ndjson tiger.ndjson toaster.ndjson toe.ndjson toilet.ndjson tooth.ndjson toothbrush.ndjson toothpaste.ndjson tornado.ndjson tractor.ndjson traffic light.ndjson train.ndjson tree.ndjson triangle.ndjson trombone.ndjson truck.ndjson trumpet.ndjson umbrella.ndjson underwear.ndjson van.ndjson vase.ndjson violin.ndjson washing machine.ndjson watermelon.ndjson waterslide.ndjson whale.ndjson wheel.ndjson windmill.ndjson wine bottle.ndjson wine glass.ndjson wristwatch.ndjson yoga.ndjson zebra.ndjson zigzag.ndjson?imageTansform.py
import os from PIL import Image # pip install pillowdef Convert(str):"""將圖像中白色像素轉變為黑色像素"""root = "D://my_py//data//image20//" + strfiles = [f for f in os.listdir(root)]for filename in files:img = Image.open(root + '/' + filename)img = img.convert("RGBA")pixdata = img.load()for y in range(img.size[1]):for x in range(img.size[0]):pixdata[x, y] = 255 - pixdata[x, y][0], \255 - pixdata[x, y][1], \255 - pixdata[x, y][2]if not os.path.exists("D:\\my_py\\data\\image20_tra\\" + str):os.mkdir("D:\\my_py\\data\\image20_tra\\" + str)img.save("D:\\my_py\\data\\image20_tra\\" + str + "\\" + filename)if __name__ == "__main__":with open("List20.txt", "r") as f:for line in f.readlines():filename = line.strip('\n') # 去掉列表中每一個元素的換行符line = filename.split('.')Convert(str(line[0])) pass圖片數據集
345類,每類200張圖片
wonderking/QuickDraw | 格物鈦,非結構化數據平臺Graviti提供基于SAAS模式的一站式數據管理,數據可視化,數據集供應,公開數據集獲取,數據標注,數據使用的AI數據平臺。https://gas.graviti.cn/dataset/wonderking/QuickDraw
參考文獻:
https://zhuanlan.zhihu.com/p/40903937https://zhuanlan.zhihu.com/p/40903937
總結
以上是生活随笔為你收集整理的将QuickDraw数据集ndjson转为png图片的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 什么是YUV
- 下一篇: [SDOI2009]学校食堂Dining