python爬蟲-json和csv資料格式

python網路爬蟲(一、二)

資料格式

python->json格式

dumps()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import json
list1=[1,2,3,4,5]
tuple1=(6,7,8,9,10)
dict1={"aaaa":123}
str1="123456"
a=1
b=1.2
c=1456789876567898765567876222222222222222222222222222222
T=True
F=False
d=None
print(json.dumps(list1))
print(json.dumps(tuple1))
print(json.dumps(str1))
print(json.dumps(dict1))
print(json.dumps(a))
print(json.dumps(b))
print(json.dumps(c))
print(json.dumps(T))
print(json.dumps(d))
print(json.dumps(F))

python->json格式

python資料 JSON資料
dict object
list,tuple array
str,unicode string
int,float,long nuumber
True true
False false
None null
1
2
3
4
5
import json
obj1=[{'name':'ray','Age':20,'size':'L'}]
jobj1=json.dumps(obj1)
print(obj1)
print(jobj1)

note:

  1. json字串是用雙引號

dumps()的sort_key參數

轉成json時,鍵排序

1
2
3
4
5
6
import json
obj1=[{'a':1,'c':2,'d':3,'b':4}]
jobj1=json.dumps(obj1)
jobj1_sort=json.dumps(obj1,sort_keys=True)
print(jobj1)
print(jobj1_sort)

dumps()的indent參數

轉成json時,設定縮排使其容易閱讀

1
2
3
4
import json
obj1=[{'a':1,'c':2,'d':3,'b':4}]
jobj1_indent=json.dumps(obj1,indent=True)
print(jobj1_indent)

json->python格式

loads()

JSON資料 python資料
object dict
array list
string unicode
nuumber(int) int,long
nuumber(real) float
true True
false False
null None
1
2
3
4
5
import json
jobj1='{"a":1,"c":2,"d":3,"b":4}'
obj1=json.loads(jobj1)
print(obj1)
print(type(obj1))

每個JSON文件只能放一個JSON物件,要放多個物件,可用一個父JSON包含

1
2
3
4
5
6
7
import json
jobj1='{"A":[{"a":1},{"b":2}]}'
obj1=json.loads(jobj1)
print(obj1)
print(obj1["A"])
print(obj1["A"][1])
print(obj1["A"][1]["b"])

將字典寫入json

1
2
3
4
5
import json
dict1={'a':1,'c':2,'d':3,'b':4}
fn="將字典寫入json檔.json"
with open(fn,'w') as f:
json.dump(dict1,f,sort_keys=True,indent=True)

寫入中文時,encoding=’utf-8’,且indent=2, ensure_ascii=False

1
2
3
4
5
6
7
8
import json

objlist = [{"日本":"Japan", "首都":"Tykyo"},
{"美州":"USA", "首都":"Washington"}]

fn = 'out1_9_2.json'
with open(fn, 'w', encoding='utf-8') as fnObj:
json.dump(objlist, fnObj, indent=2, ensure_ascii=False)

讀取json檔案

1
2
3
4
5
6
7
8
import json

fn = 'out1_9.json'
with open(fn, 'r') as fnObj:
data = json.load(fnObj)

print(data)
print(type(data))

應用

1
2
3
4
5
6
import json
fn='login.json'
user=input("輸入使用者名稱 : ")
with open(fn,'w',encoding='utf-8') as file:
json.dump(user,file,ensure_ascii=False)
print("%s 歡迎使用本系統。"%user)
1
2
3
4
5
import json
fn='login.json'
with open(fn,'r',encoding='utf-8') as file:
login=json.load(file)
print("%s 歡迎回來" %login)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import json
fn='login_ch1_13_2.json'
try:
with open(fn,'r',encoding='utf-8') as file:
login=json.load(file)
loginu=input("請輸入帳號 : ")
except Exception:
with open(fn,'w',encoding='utf-8') as file:
login=input("新增帳號 : ")
json.dump(login,file,ensure_ascii=False)
print("帳號已新增")
else:
if loginu==login:
print("%s 歡迎回來"%login)
else:
print("使用者名稱錯誤")

應用(建立世界地圖)

1
2
3
4
5
import pygal.maps.world
worldmap=pygal.maps.world.World()
worldmap.title='world map'
worldmap.add('Asia',['jp','cn','tw'])
worldmap.render_to_file('out1_18_2.svg')
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import json
import pygal.maps.world
from pygal.maps.world import COUNTRIES
fn='populations.json'
def getcode(CountryName):
for a,b in COUNTRIES.items():
if b==CountryName:
return a
return None
with open(fn,'r') as file:
datas=json.load(file)
dict1={}
for data in datas:
if data['Year']=='2010':
CountryName=data['Country Name']
Year=data['Year']
n=int(float(data['Numbers']))
wcode=getcode(CountryName)
if wcode!=None:
dict1[wcode]=n

worldmap=pygal.maps.world.World()
worldmap.title='2010人口分布'
worldmap.add('2010',dict1)
worldmap.render_to_file('out1_21_2.svg')
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import json
import pygal.maps.world
from pygal.maps.world import COUNTRIES
fn='populations.json'
def getcode(a):
for x,y in COUNTRIES.items():
if y==a:
return x
return None
with open(fn,'r') as file:
datas=json.load(file)
dict1={}
dict2={}
for data in datas:
if data['Year']=='2010':
a=data['Country Name']
b=int(float(data['Numbers']))
c=getcode(a)
if c!=None:
if b>=100000000:
dict1[c]=b
else:
dict2[c]=b

worldmap=pygal.maps.world.World()
worldmap.title='2010'
worldmap.add('>=100000000',dict1)
worldmap.add('<100000000',dict2)
worldmap.render_to_file('out1_22_2.svg')

csv

檔案開啟(reader())

1
2
3
4
5
6
7
import csv
fn='csvReport.csv'
with open(fn,'r') as file:
datas=csv.reader(file)#指定跌代對象
pdata=list(datas)
print(datas)
print(pdata)

或者用for迴圈列印

1
2
3
4
5
6
import csv
fn='csvReport.csv'
with open(fn,'r') as file:
p=csv.reader(file)
for data in p:
print(data)

或者用for列印串列內容

1
2
3
4
5
6
7
import csv
fn='csvReport.csv'
with open(fn,'r') as file:
p=csv.reader(file)
datas=list(p)
for data in datas:
print(data)

使用串列索引讀取csv

1
2
3
4
5
6
7
8
import csv
fn='csvReport.csv'
with open(fn,'r') as file:
p=csv.reader(file)
datas=list(p)
print(datas[0][0],datas[0][1])
print(datas[1][0],datas[1][1])
print(datas[3][0],datas[3][1])

檔案開啟(dictreader())

1
2
3
4
5
6
import csv
fn='csvPeople.csv'
with open(fn,'r') as file:
dict1=csv.DictReader(file)
for data in dict1:
print(data)

將檔案中的file name與last name印出來

1
2
3
4
5
6
7
import csv
fn='csvPeople.csv'
with open(fn,'r') as file:
p=csv.DictReader(file)
print(p)
for data in p:
print(data['first_name'],data['last_name'])

寫入csv檔(writer)

1
2
3
4
5
6
7
8
import csv
fn='out2_7_2.csv'
with open(fn,'w',newline='') as file:
writefile=csv.writer(file)
writefile.writerow(['name','age','high'])
writefile.writerow(['a','14','100'])
writefile.writerow(['b','18','200'])
writefile.writerow(['c','16','150'])

複製csv檔

1
2
3
4
5
6
7
8
9
10
11
12
13
14
import csv
rn=input('複製檔案名:')
wn=input('輸出檔案名:')
try:
with open(rn,'r') as file:
p=csv.reader(file)
datas=list(p)
except:
print("未找到複製的檔案")
else:
with open(wn,'w',newline='') as file:
writefile=csv.writer(file)
for data in datas:
writefile.writerow(data)

delimiter改變csv的分隔符號

1
2
3
4
5
6
7
8
import csv
fn='out2_9_2.csv'
with open(fn,'w',newline='') as file:
writefile=csv.writer(file,delimiter='\t')
writefile.writerow(['name','age','high'])
writefile.writerow(['a','14','100'])
writefile.writerow(['b','18','200'])
writefile.writerow(['c','16','200'])

寫入csv檔(dictwrite)

1
2
3
4
5
6
7
8
9
import csv
fn='out2_10_2.csv'
list1=['name','age','high']
with open(fn,'w',newline='') as file:
writefile=csv.DictWriter(file,fieldnames=list1)
writefile.writeheader()
writefile.writerow({'name':'a','age':'18','high':'100'})
writefile.writerow({'name':'b','age':'14','high':'200'})
writefile.writerow({'name':'c','age':'16','high':'150'})

或用for輸入串列資料

1
2
3
4
5
6
7
8
9
import csv
fn='out2_11_3.csv'
listd=[{'name':'a','age':'18','high':'100'},{'name':'b','age':'14','high':'200'},{'name':'c','age':'16','high':'150'},{'name':'d','age':'19','high':'300'}]
listh=['name','age','high']
with open(fn,'w',newline='') as file:
writefile=csv.DictWriter(file,fieldnames=listh)
writefile.writeheader()
for data in listd:
writefile.writerow(data)

writerows寫法:

1
2
3
4
5
6
7
8
import csv
fn='out2_11_3.csv'
listd=[{'name':'a','age':'18','high':'100'},{'name':'b','age':'14','high':'200'},{'name':'c','age':'16','high':'150'},{'name':'d','age':'19','high':'300'}]
listh=['name','age','high']
with open(fn,'w',newline='') as file:
writefile=csv.DictWriter(file,fieldnames=listh)
writefile.writeheader()
writefile.writerows(listd)

中文寫法:

1
2
3
4
5
6
7
8
9
import csv
fn='out2_11_4.csv'
listd=[{'名字':'小名','年齡':'20','身高':'100'},{'名字':'小華','年齡':'21','身高':'150'},{'名字':'小立','年齡':'22','身高':'200'}]
listh=['名字','年齡','身高']
with open(fn,'w',encoding='utf-8',newline='') as file:
writefile=csv.DictWriter(file,fieldnames=listh)
writefile.writeheader()
for data in listd:
writefile.writerow(data)

中文讀取:

1
2
3
4
5
6
import csv
fn='out2_11_4.csv'
with open(fn,'r',encoding='utf-8') as file:
datas=csv.DictReader(file)
for data in datas:
print(data)

csv專案(使用csv繪製氣象圖表)

讀取檔案資料(標題):

1
2
3
4
5
6
import csv
fn='TaipeiWeatherJan.csv'
with open(fn) as file:
datas=csv.reader(file)
header=next(datas)
print(header)

列出標題相對索引

1
2
3
4
5
6
7
8
9
import csv
fn='TaipeiWeatherJan.csv'
with open(fn) as file:
datas=csv.reader(file)
heard=next(datas)
for data in enumerate(heard):
print(data)
for n,d in enumerate(heard):
print(n,d)

讀取最高溫和最低溫

1
2
3
4
5
6
7
8
9
10
11
12
import csv
fn='TaipeiWeatherJan.csv'
with open(fn) as file:
datas=csv.reader(file)
heard=next(datas)
lowTemps=[]
highTemps=[]
for data in datas:
lowTemps.append(data[3])
highTemps.append(data[1])
print('高溫:',highTemps)
print('低溫:',lowTemps)

繪製最低溫圖

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
import csv
import matplotlib.pyplot as plt

fn='TaipeiWeatherJan.csv'
with open(fn) as file:
datas=csv.reader(file)
header=next(datas)
lowTemps=[]
for data in datas:
lowTemps.append(int(data[3]))
plt.plot(lowTemps)
plt.title('Weather Report, Jan. 2017',fontsize=24)
plt.xlabel('',fontsize=14)
plt.ylabel('Temperature(c)',fontsize=14)
plt.tick_params(axis='both',labelsize=12,color='red')
plt.show()

設定繪圖區大小

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import csv
import matplotlib.pyplot as plt

fn='TaipeiWeatherJan.csv'
with open(fn) as file:
datas=csv.reader(file)
header=next(datas)
lowTemps=[]
for data in datas:
lowTemps.append(int(data[3]))
plt.figure(dpi=80,figsize=(12,15)) #長:80*12 寬:80*15
plt.plot(lowTemps)
plt.title("Weather Report, Jan. 2017",fontsize=24)
plt.xlabel('',fontsize=12)
plt.ylabel('Temperature (C)',fontsize=12)
plt.tick_params(axis='both',labelsize=14,color='red')
plt.show()

日期格式

1
2
3
4
from datetime import datetime

date=datetime.strptime('2024/02/05 20:30:30','%Y/%m/%d %H:%M:%S')
print(date)

在圖表上加入日期格式

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import csv
import matplotlib.pyplot as plt
from datetime import datetime

fn='TaipeiWeatherJan.csv'
with open(fn) as file:
datas=csv.reader(file)
header=next(datas)
date=[]
lowTemps=[]
for data in datas:
lowTemps.append(int(data[3]))
date.append(datetime.strptime(data[0],'%Y/%m/%d'))
plt.figure(dpi=80,figsize=(12,8))
plt.plot(date,lowTemps)
plt.title('Weather Report, Jan. 2017',fontsize=24)
plt.xlabel('',fontsize=12)
plt.ylabel('Temperature (C)',fontsize=12)
plt.tick_params(axis='both',labelsize=14,color='red')
plt.show()

日期旋轉

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
import csv
import matplotlib.pyplot as plt
from datetime import datetime

fn='TaipeiWeatherJan.csv'
with open(fn) as file:
datas=csv.reader(file)
header=next(datas)
lowTemps=[]
date=[]
for data in datas:
lowTemps.append(int(data[3]))
date.append(datetime.strptime(data[0],'%Y/%m/%d'))
fig=plt.figure(dpi=80,figsize=(12,8))
plt.plot(date,lowTemps)
fig.autofmt_xdate() #可用rotation=n 來設定度數
plt.title('Weather Report, Jan. 2017',fontsize=24)
plt.xlabel('',fontsize=12)
plt.ylabel('Temperature (C)',fontsize=12)
plt.tick_params(axis='both',labelsize=14,color='red')
plt.show()

繪製高低溫圖

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import csv
import matplotlib.pyplot as plt
from datetime import datetime

fn='TaipeiWeatherJan.csv'
with open(fn) as file:
datas=csv.reader(file)
header=next(datas)
lowTemps=[]
highTemps=[]
date=[]
for data in datas:
try:
a=int(data[3])
b=int(data[1])
except:
print('有缺值')
else:
lowTemps.append(a)
highTemps.append(b)
date.append(datetime.strptime(data[0],'%Y/%m/%d'))
fig=plt.figure(dpi=80,figsize=(12,8))
plt.plot(date,highTemps,color='r')
plt.plot(date,lowTemps,color='b')
fig.autofmt_xdate(rotation=60)
plt.title('Weather Report, Jan. 2017',fontsize=24,color='b')
plt.xlabel('',fontsize=12)
plt.ylabel('Temperature (C)',fontsize=12)
plt.tick_params(axis='both',labelsize=14,color='red')
plt.show()

填滿中間區塊

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import csv
import matplotlib.pyplot as plt
from datetime import datetime

fn='TaipeiWeatherJan.csv'
with open(fn) as file:
datas=csv.reader(file)
header=next(datas)
lowTemps=[]
highTemps=[]
date=[]
for data in datas:
try:
a=int(data[3])
b=int(data[1])
except:
print('有缺值')
else:
lowTemps.append(a)
highTemps.append(b)
date.append(datetime.strptime(data[0],'%Y/%m/%d'))
fig=plt.figure(dpi=80,figsize=(12,8))
plt.plot(date,highTemps,color='r')
plt.plot(date,lowTemps,color='b')
plt.fill_between(date,highTemps,lowTemps,color='y',alpha=0.1) #alpha透明度0.1
fig.autofmt_xdate(rotation=60)
plt.title('Weather Report, Jan. 2017',fontsize=24,color='b')
plt.xlabel('',fontsize=12)
plt.ylabel('Temperature (C)',fontsize=12)
plt.tick_params(axis='both',labelsize=14,color='red')
plt.show()

pickle

寫入

1
2
3
4
5
6
7
8
9
10
import pickle
data={
'name':'a',
'age':'20',
'high':200,
'data':[1,2,3,4,5,6,67]
}
fn='ch2_23_2.dat'
with open(fn,'wb') as file:
pickle.dump(data,file)

讀取

1
2
3
4
5
6
import pickle
fn='ch2_23_2.dat'
with open(fn,'rb') as file:
datas=pickle.load(file)
print(datas)
print(datas['data'][2])

python與Microsoft Excel

安裝模組

1
pip install xlwt

基本功能(寫入)

建立活頁簿

1
活頁簿物件=xlwt.Workbook

建立工作表

1
工作物件=活頁簿物件.add_sheet(sheet,cell_overwrite_ok=True)   #第二個參數為True,表示可以重設Excel的儲存格內容

將資料寫入儲存格

1
工作物件.write(row,col,data)   #將data寫入(row,col)位置

實作(寫入)

1
2
3
4
5
6
7
8
9
10
11
import xlwt
fn='out2_25_2.xls'
data=['iphone 13','iphone 14','iphone 15']
price=['10000','20000','30000']
excel=xlwt.Workbook()
sh=excel.add_sheet('phone',cell_overwrite_ok=True)
for i in range(len(data)):
sh.write(0,i,data[i])
for j in range(len(price)):
sh.write(1,j,price[j])
excel.save(fn)

基本功能(讀取)

開啟excel檔案供讀取

1
活頁簿件=xlrd.open.workbook()

建立工作物件

1
工作物件=活頁簿物件.sheet()[index]   #上述傳回指定工作表的物件

傳回工作表row數

1
row=工作表物件.nrows

傳回工作表的col數

1
cols=工作表物件.ncols

讀取某rows的數據

1
list_data=工作表物件.row_valies(rows)  #將指定工作表的rows的值已串列格式回傳給list_data

實作(讀取)

1
2
3
4
5
6
7
8
import xlrd

fn='out2_25_2.xls'
excel=xlrd.open_workbook(fn)
sh=excel.sheets()[0]
n=sh.nrows
for i in range(n):
print(sh.row_values(i))
⬆︎TOP