python爬蟲-json和csv資料格式

python網路爬蟲(一、二)

資料格式

python->json格式

dumps()

import json
list1=[1,2,3,4,5]
tuple1=(6,7,8,9,10)
dict1={"aaaa":123}
str1="123456"
a=1
b=1.2
c=1456789876567898765567876222222222222222222222222222222
T=True
F=False
d=None
print(json.dumps(list1))
print(json.dumps(tuple1))
print(json.dumps(str1))
print(json.dumps(dict1))
print(json.dumps(a))
print(json.dumps(b))
print(json.dumps(c))
print(json.dumps(T))
print(json.dumps(d))
print(json.dumps(F))

python->json格式

python資料	JSON資料
dict	object
list,tuple	array
str,unicode	string
int,float,long	nuumber
True	true
False	false
None	null

import json
obj1=[{'name':'ray','Age':20,'size':'L'}]
jobj1=json.dumps(obj1)
print(obj1)
print(jobj1)

note:

json字串是用雙引號

dumps()的sort_key參數

轉成json時，鍵排序

import json
obj1=[{'a':1,'c':2,'d':3,'b':4}]
jobj1=json.dumps(obj1)
jobj1_sort=json.dumps(obj1,sort_keys=True)
print(jobj1)
print(jobj1_sort)

dumps()的indent參數

轉成json時，設定縮排使其容易閱讀

import json
obj1=[{'a':1,'c':2,'d':3,'b':4}]
jobj1_indent=json.dumps(obj1,indent=True)
print(jobj1_indent)

json->python格式

loads()

JSON資料	python資料
object	dict
array	list
string	unicode
nuumber(int)	int,long
nuumber(real)	float
true	True
false	False
null	None

import json
jobj1='{"a":1,"c":2,"d":3,"b":4}'
obj1=json.loads(jobj1)
print(obj1)
print(type(obj1))

每個JSON文件只能放一個JSON物件，要放多個物件，可用一個父JSON包含

import json
jobj1='{"A":[{"a":1},{"b":2}]}'
obj1=json.loads(jobj1)
print(obj1)
print(obj1["A"])
print(obj1["A"][1])
print(obj1["A"][1]["b"])

將字典寫入json

import json
dict1={'a':1,'c':2,'d':3,'b':4}
fn="將字典寫入json檔.json"
with open(fn,'w') as f:
    json.dump(dict1,f,sort_keys=True,indent=True)

寫入中文時，encoding=’utf-8’，且indent=2, ensure_ascii=False

import json

objlist = [{"日本":"Japan", "首都":"Tykyo"},
           {"美州":"USA", "首都":"Washington"}]

fn = 'out1_9_2.json'
with open(fn, 'w', encoding='utf-8') as fnObj:
    json.dump(objlist, fnObj, indent=2, ensure_ascii=False)

讀取json檔案

import json
      
fn = 'out1_9.json'
with open(fn, 'r') as fnObj:
    data = json.load(fnObj)

print(data)
print(type(data))

應用

import json
fn='login.json'
user=input("輸入使用者名稱 : ")
with open(fn,'w',encoding='utf-8') as file:
    json.dump(user,file,ensure_ascii=False)
    print("%s 歡迎使用本系統。"%user)

import json
fn='login.json'
with open(fn,'r',encoding='utf-8') as file:
    login=json.load(file)
    print("%s 歡迎回來" %login)

import json
fn='login_ch1_13_2.json'
try:
    with open(fn,'r',encoding='utf-8') as file:
        login=json.load(file)
        loginu=input("請輸入帳號 : ")
except Exception:
    with open(fn,'w',encoding='utf-8') as file:
        login=input("新增帳號 : ")
        json.dump(login,file,ensure_ascii=False)
        print("帳號已新增")
else:
    if loginu==login:
        print("%s 歡迎回來"%login)
    else:
        print("使用者名稱錯誤")

應用(建立世界地圖)

import pygal.maps.world
worldmap=pygal.maps.world.World()
worldmap.title='world map'
worldmap.add('Asia',['jp','cn','tw'])
worldmap.render_to_file('out1_18_2.svg')

import json
import pygal.maps.world
from pygal.maps.world import COUNTRIES
fn='populations.json'
def getcode(CountryName):
    for a,b in COUNTRIES.items():
        if b==CountryName:
            return a
    return None
with open(fn,'r') as file:
    datas=json.load(file)
dict1={}
for data in datas:
    if data['Year']=='2010':
        CountryName=data['Country Name']
        Year=data['Year']
        n=int(float(data['Numbers']))
        wcode=getcode(CountryName)
        if wcode!=None:
            dict1[wcode]=n

worldmap=pygal.maps.world.World()
worldmap.title='2010人口分布'
worldmap.add('2010',dict1)
worldmap.render_to_file('out1_21_2.svg')

import json
import pygal.maps.world
from pygal.maps.world import COUNTRIES
fn='populations.json'
def getcode(a):
    for x,y in COUNTRIES.items():
        if y==a:
            return x
    return None
with open(fn,'r') as file:
    datas=json.load(file)
dict1={}
dict2={}
for data in datas:
    if data['Year']=='2010':
        a=data['Country Name']
        b=int(float(data['Numbers']))
        c=getcode(a)
        if c!=None:
            if b>=100000000:
                dict1[c]=b
            else:
                dict2[c]=b

worldmap=pygal.maps.world.World()
worldmap.title='2010'
worldmap.add('>=100000000',dict1)
worldmap.add('<100000000',dict2)
worldmap.render_to_file('out1_22_2.svg')

csv

檔案開啟(reader())

import csv
fn='csvReport.csv'
with open(fn,'r') as file:
    datas=csv.reader(file)#指定跌代對象
    pdata=list(datas)
print(datas)
print(pdata)

或者用for迴圈列印

import csv
fn='csvReport.csv'
with open(fn,'r') as file:
    p=csv.reader(file)
    for data in p:
        print(data)

或者用for列印串列內容

import csv
fn='csvReport.csv'
with open(fn,'r') as file:
    p=csv.reader(file)
    datas=list(p)
for data in datas:
    print(data)

使用串列索引讀取csv

import csv
fn='csvReport.csv'
with open(fn,'r') as file:
    p=csv.reader(file)
    datas=list(p)
print(datas[0][0],datas[0][1])
print(datas[1][0],datas[1][1])
print(datas[3][0],datas[3][1])

檔案開啟(dictreader())

import csv
fn='csvPeople.csv'
with open(fn,'r') as file:
    dict1=csv.DictReader(file)
    for data in dict1:
        print(data)

將檔案中的file name與last name印出來

import csv
fn='csvPeople.csv'
with open(fn,'r') as file:
    p=csv.DictReader(file)
    print(p)
    for data in p:
        print(data['first_name'],data['last_name'])

寫入csv檔(writer)

import csv
fn='out2_7_2.csv'
with open(fn,'w',newline='') as file:
    writefile=csv.writer(file)
    writefile.writerow(['name','age','high'])
    writefile.writerow(['a','14','100'])
    writefile.writerow(['b','18','200'])
    writefile.writerow(['c','16','150'])

複製csv檔

import csv
rn=input('複製檔案名:')
wn=input('輸出檔案名:')
try:
    with open(rn,'r') as file:
        p=csv.reader(file)
        datas=list(p)
except:
    print("未找到複製的檔案")
else:
    with open(wn,'w',newline='') as file:
        writefile=csv.writer(file)
        for data in datas:
            writefile.writerow(data)

delimiter改變csv的分隔符號

import csv
fn='out2_9_2.csv'
with open(fn,'w',newline='') as file:
    writefile=csv.writer(file,delimiter='\t')
    writefile.writerow(['name','age','high'])
    writefile.writerow(['a','14','100'])
    writefile.writerow(['b','18','200'])
    writefile.writerow(['c','16','200'])

寫入csv檔(dictwrite)

import csv
fn='out2_10_2.csv'
list1=['name','age','high']
with open(fn,'w',newline='') as file:
    writefile=csv.DictWriter(file,fieldnames=list1)
    writefile.writeheader()
    writefile.writerow({'name':'a','age':'18','high':'100'})
    writefile.writerow({'name':'b','age':'14','high':'200'})
    writefile.writerow({'name':'c','age':'16','high':'150'})

或用for輸入串列資料

import csv
fn='out2_11_3.csv'
listd=[{'name':'a','age':'18','high':'100'},{'name':'b','age':'14','high':'200'},{'name':'c','age':'16','high':'150'},{'name':'d','age':'19','high':'300'}]
listh=['name','age','high']
with open(fn,'w',newline='') as file:
    writefile=csv.DictWriter(file,fieldnames=listh)
    writefile.writeheader()
    for data in listd:
        writefile.writerow(data)

writerows寫法:

import csv
fn='out2_11_3.csv'
listd=[{'name':'a','age':'18','high':'100'},{'name':'b','age':'14','high':'200'},{'name':'c','age':'16','high':'150'},{'name':'d','age':'19','high':'300'}]
listh=['name','age','high']
with open(fn,'w',newline='') as file:
    writefile=csv.DictWriter(file,fieldnames=listh)
    writefile.writeheader()
    writefile.writerows(listd)

中文寫法:

import csv
fn='out2_11_4.csv'
listd=[{'名字':'小名','年齡':'20','身高':'100'},{'名字':'小華','年齡':'21','身高':'150'},{'名字':'小立','年齡':'22','身高':'200'}]
listh=['名字','年齡','身高']
with open(fn,'w',encoding='utf-8',newline='') as file:
    writefile=csv.DictWriter(file,fieldnames=listh)
    writefile.writeheader()
    for data in listd:
        writefile.writerow(data)

中文讀取:

import csv
fn='out2_11_4.csv'
with open(fn,'r',encoding='utf-8') as file:
    datas=csv.DictReader(file)
    for data in datas:
        print(data)

csv專案(使用csv繪製氣象圖表)

讀取檔案資料(標題):

import csv
fn='TaipeiWeatherJan.csv'
with open(fn) as file:
    datas=csv.reader(file)
    header=next(datas)
    print(header)

列出標題相對索引

import csv
fn='TaipeiWeatherJan.csv'
with open(fn) as file:
    datas=csv.reader(file)
    heard=next(datas)
    for data in enumerate(heard):
        print(data)
    for n,d in enumerate(heard):
        print(n,d)

讀取最高溫和最低溫

import csv
fn='TaipeiWeatherJan.csv'
with open(fn) as file:
    datas=csv.reader(file)
    heard=next(datas)
    lowTemps=[]
    highTemps=[]
    for data in datas:
        lowTemps.append(data[3])
        highTemps.append(data[1])
print('高溫:',highTemps)
print('低溫:',lowTemps)

繪製最低溫圖

import csv
import matplotlib.pyplot as plt

fn='TaipeiWeatherJan.csv'
with open(fn) as file:
    datas=csv.reader(file)
    header=next(datas)
    lowTemps=[]
    for data in datas:
        lowTemps.append(int(data[3]))
plt.plot(lowTemps)
plt.title('Weather Report, Jan. 2017',fontsize=24)
plt.xlabel('',fontsize=14)
plt.ylabel('Temperature(c)',fontsize=14)
plt.tick_params(axis='both',labelsize=12,color='red')
plt.show()

設定繪圖區大小

import csv
import matplotlib.pyplot as plt

fn='TaipeiWeatherJan.csv'
with open(fn) as file:
    datas=csv.reader(file)
    header=next(datas)
    lowTemps=[]
    for data in datas:
        lowTemps.append(int(data[3]))
plt.figure(dpi=80,figsize=(12,15)) #長:80*12 寬:80*15 
plt.plot(lowTemps)
plt.title("Weather Report, Jan. 2017",fontsize=24)
plt.xlabel('',fontsize=12)
plt.ylabel('Temperature (C)',fontsize=12)
plt.tick_params(axis='both',labelsize=14,color='red')
plt.show()

日期格式

from datetime import datetime

date=datetime.strptime('2024/02/05 20:30:30','%Y/%m/%d %H:%M:%S')
print(date)

在圖表上加入日期格式

import csv
import matplotlib.pyplot as plt
from datetime import datetime

fn='TaipeiWeatherJan.csv'
with open(fn) as file:
    datas=csv.reader(file)
    header=next(datas)
    date=[]
    lowTemps=[]
    for data in datas:
        lowTemps.append(int(data[3]))
        date.append(datetime.strptime(data[0],'%Y/%m/%d'))
plt.figure(dpi=80,figsize=(12,8))
plt.plot(date,lowTemps)
plt.title('Weather Report, Jan. 2017',fontsize=24)
plt.xlabel('',fontsize=12)
plt.ylabel('Temperature (C)',fontsize=12)
plt.tick_params(axis='both',labelsize=14,color='red')
plt.show()

日期旋轉

import csv
import matplotlib.pyplot as plt
from datetime import datetime

fn='TaipeiWeatherJan.csv'
with open(fn) as file:
    datas=csv.reader(file)
    header=next(datas)
    lowTemps=[]
    date=[]
    for data in datas:
        lowTemps.append(int(data[3]))
        date.append(datetime.strptime(data[0],'%Y/%m/%d'))
fig=plt.figure(dpi=80,figsize=(12,8))
plt.plot(date,lowTemps)
fig.autofmt_xdate() #可用rotation=n 來設定度數
plt.title('Weather Report, Jan. 2017',fontsize=24)
plt.xlabel('',fontsize=12)
plt.ylabel('Temperature (C)',fontsize=12)
plt.tick_params(axis='both',labelsize=14,color='red')
plt.show()

繪製高低溫圖

import csv
import matplotlib.pyplot as plt
from datetime import datetime

fn='TaipeiWeatherJan.csv'
with open(fn) as file:
    datas=csv.reader(file)
    header=next(datas)
    lowTemps=[]
    highTemps=[]
    date=[]
    for data in datas:
        try:
            a=int(data[3])
            b=int(data[1])
        except:
            print('有缺值')
        else:
            lowTemps.append(a)
            highTemps.append(b)
            date.append(datetime.strptime(data[0],'%Y/%m/%d'))
fig=plt.figure(dpi=80,figsize=(12,8))
plt.plot(date,highTemps,color='r')
plt.plot(date,lowTemps,color='b')
fig.autofmt_xdate(rotation=60)
plt.title('Weather Report, Jan. 2017',fontsize=24,color='b')
plt.xlabel('',fontsize=12)
plt.ylabel('Temperature (C)',fontsize=12)
plt.tick_params(axis='both',labelsize=14,color='red')
plt.show()

填滿中間區塊

import csv
import matplotlib.pyplot as plt
from datetime import datetime

fn='TaipeiWeatherJan.csv'
with open(fn) as file:
    datas=csv.reader(file)
    header=next(datas)
    lowTemps=[]
    highTemps=[]
    date=[]
    for data in datas:
        try:
            a=int(data[3])
            b=int(data[1])
        except:
            print('有缺值')
        else:
            lowTemps.append(a)
            highTemps.append(b)
            date.append(datetime.strptime(data[0],'%Y/%m/%d'))
fig=plt.figure(dpi=80,figsize=(12,8))
plt.plot(date,highTemps,color='r')
plt.plot(date,lowTemps,color='b')
plt.fill_between(date,highTemps,lowTemps,color='y',alpha=0.1) #alpha透明度0.1
fig.autofmt_xdate(rotation=60)
plt.title('Weather Report, Jan. 2017',fontsize=24,color='b')
plt.xlabel('',fontsize=12)
plt.ylabel('Temperature (C)',fontsize=12)
plt.tick_params(axis='both',labelsize=14,color='red')
plt.show()

pickle

寫入

import pickle
data={
    'name':'a',
    'age':'20',
    'high':200,
    'data':[1,2,3,4,5,6,67]
}
fn='ch2_23_2.dat'
with open(fn,'wb') as file:
    pickle.dump(data,file)

讀取

import pickle
fn='ch2_23_2.dat'
with open(fn,'rb') as file:
    datas=pickle.load(file)
    print(datas)
    print(datas['data'][2])

python與Microsoft Excel

安裝模組

1	pip install xlwt

基本功能(寫入)

建立活頁簿

1	活頁簿物件=xlwt.Workbook

建立工作表

1	工作物件=活頁簿物件.add_sheet(sheet,cell_overwrite_ok=True) #第二個參數為True，表示可以重設Excel的儲存格內容

將資料寫入儲存格

1	工作物件.write(row,col,data) #將data寫入(row,col)位置

實作(寫入)

import xlwt
fn='out2_25_2.xls'
data=['iphone 13','iphone 14','iphone 15']
price=['10000','20000','30000']
excel=xlwt.Workbook()
sh=excel.add_sheet('phone',cell_overwrite_ok=True)
for i in range(len(data)):
    sh.write(0,i,data[i])
for j in range(len(price)):
    sh.write(1,j,price[j])
excel.save(fn)

基本功能(讀取)

開啟excel檔案供讀取

1	活頁簿件=xlrd.open.workbook()

建立工作物件

1	工作物件=活頁簿物件.sheet()[index] #上述傳回指定工作表的物件

傳回工作表row數

1	row=工作表物件.nrows

傳回工作表的col數

1	cols=工作表物件.ncols

讀取某rows的數據

1	list_data=工作表物件.row_valies(rows) #將指定工作表的rows的值已串列格式回傳給list_data

實作(讀取)

import xlrd

fn='out2_25_2.xls'
excel=xlrd.open_workbook(fn)
sh=excel.sheets()[0]
n=sh.nrows
for i in range(n):
    print(sh.row_values(i))

Zhi筆記部落格

Prev Home Next

python爬蟲(第一章,第二章)

python網路爬蟲(一、二)

資料格式

python->json格式

dumps()

python->json格式

dumps()的sort_key參數

dumps()的indent參數

json->python格式

loads()

每個JSON文件只能放一個JSON物件，要放多個物件，可用一個父JSON包含

將字典寫入json

讀取json檔案

應用

應用(建立世界地圖)

csv

檔案開啟(reader())

檔案開啟(dictreader())

寫入csv檔(writer)

複製csv檔

delimiter改變csv的分隔符號

寫入csv檔(dictwrite)

csv專案(使用csv繪製氣象圖表)

pickle

python與Microsoft Excel

安裝模組

基本功能(寫入)

實作(寫入)

基本功能(讀取)

實作(讀取)