【Python】CSV文件读取、筛选、删除、改变、转存

发布于:2023-02-15 ⋅ 阅读:(932) ⋅ 点赞:(0)

一、CSV文件读取

1.读行标题

#reading headers
        print(a.columns) 

2.按行读取

#reading row     注意break右边取不到
        print(a[1:2])     #第一行
        print(a[:2])      #01print(a.head(2))  #头两行 尾两行用tail
        print(a[:])       #All rows
        print(a[-1:0])    #倒数第二行

3.按列读取

#reading column
        print(a['T'])      #a['T'][0:3] T列的特定行
        print(a.T)
#reading multiple columns : change single str to a list
        print(a[['T','TI']])

4.按位置读取

# reading the data in a specific location
        print(a.iloc[2,1])

二、CSV文件筛选

1.“特定列”按数值筛选数据

#filtering data in a specific row 
        print(a['N'].loc[a['N']==3])   
        print(a.N.loc[a['NC']==3])
        print(a.loc[a['N']<=2])    
        print(a.loc[(a['N']==2) | (a['S']==2)])           #|print(a.loc[(a['N']==2) & (a['S']==2)])           #&print(a.reset_index(drop=True))                   # 去掉索引
        print(a.loc[a['T'].str.contains('')])             # 包含特定字符串
        

2.统计min,max,std,

# max,min,std,0.5,0.75,0.25
        print(a.describe())        

3.排序

# sort 排序
        print(a.sort_values('T',ascending=False))   #ascending升序

三、CSV文件删除

 # drop a specific column
        print(a.drop(columns=['TOTAL']).head(2))

四、CSV文件改变

1.行汇总

# making changes to data
        a['TOTAL']=a['T']+a['TI']
        print(a.head(2))

在这里插入图片描述

2.特定列,行汇总

# 特定列,逐行相加运算 
        a['TOTAL']=a.iloc[:,5:9].sum(axis=1)         #:means all rows所有行,逗号后面为五到八列
        print(a['TOTAL'].head(3))

3.列顺序转变

# 转换列的顺序,,提取特定几列
        columns=list(a.columns.values)
        print(a[columns[0:4]+columns[-2:-1]].head(3))

4.改列值

        a.loc[a['RE']==-9,'RE']=0
        print(a)

        a.loc[a['RE']==0,'RE']= 'Bingo'
        print(a)

        a.loc[a['R']==0,['RE','REC']]= 'Bingo'
        print(a)   

五、CSV文件转移

 # Saving to a new csv
        b=a.to_csv('new_csv',index=False)
        print(b)

六、CONTINUE

        # a['count']=1
        a.groupby(['RECO_SR']).count()['count']
        print(a)

import pandas as pd

def readcsv(path):
        for i in os.listdir(path):
        
        wl=os.path.join(path,i)
        a=pd.read_csv(wl)
        ##########d读、筛、删、改、转
        
        #reading headers
        print(a.columns) 
        
        #read a specific column
        print(a['T'][:3])
        print(a.T)
        
        
        #read multi_columns :change single to a list of name
        print(a[['T','T']])
        
        
        #read row 注意读行读区间
        print(a[1:2])
        
        #find specific data in a specific row 
        print(a['NE'].loc[a['NEE']==3])
        print(a.N.loc[a['N']==3])
        
        # read row by row
        for index, row in a.iterrows():
            print(index,row['TI'])
            

        # read the data in a specific location
        print(a.iloc[2,1])

        # max,min,std,0.5,0.75,0.25
        print(a.describe())
        
        # sort 排序
        print(a.sort_values('T',ascending=False))
        
        # ########Flitering data
        
        print(a.loc[a['NEC']<=2])    
        print(a.loc[(a['NE']==2) | (a['S']==2)])
        print(a.loc[(a['NE']==2) & (a['S']==2)])
        print(a.reset_index(drop=True))
        print(a.loc[a['T'].str.contains('')])
        
        ########## making changes to data
        a['TOTAL']=a['T']+a['TI']
        print(a.head(2))
        
        drop a specific column
        print(a.drop(columns=['TOTAL']).head(2))

        # 特定列,逐行相加运算 
        :, means all rows所有行,逗号后面为五到九列
        a['TOTAL']=a.iloc[:,5:9].sum(axis=1)
        print(a['TOTAL'].head(3))
        
        # 转换列的顺序,,提取特定几列
        columns=list(a.columns.values)
        print(a[columns[0:4]+columns[-2:-1]].head(3))
        
        ######### Saving to a new csv
        b=a.to_csv('new_csv',index=False)
        print(b)
        
        # #########condional change
        a.loc[a['R']==-9999,'R']=0
        print(a)
        
        a.loc[a['RE']==0,'RE']= 'Bingo'
        print(a)

        a.loc[a['RE']==0,['RE','RE']]= 'Bingo'
        print(a)
        
        
        # a['count']=1
        a.groupby(['RE']).count()['count']
        print(a)


readcsv(r'D:\副本 (2)')