原标题:python处理csv文件,分列,去重,合并
原文来自:CSDN 原文链接:https://blog.csdn.net/qq_43456006/article/details/103129944
11/19/2019发现with open(filepath) as file:读取csv文件时,会发生编码错误,改成with open(filepath, encoding=‘utf-8’) as file:就可以了,然后又发现还是会报编码错误,于是发现在写入文件的时候也需要改编码形式,python好像默认gbk编码?反正在WriteCsv函数 with open(path, ‘w’, newline = ‘’) as rows:改成 with open(path, ‘w’, newline = ‘’, encoding=‘utf-8’) as rows:就好了
具体参数请阅读注释,自行修改(原谅我不会写用户界面,我是废物啊啊啊啊啊啊啊啊啊啊)
# -*- coding: utf-8 -*-
import pandas as pd
#import numpy as np
import os
import csv
"""
设置路径
"""
filepath = 'test/'
"""分列"""
def Devide(filepath):
"""打开此路径下所有文件"""
files = os.listdir(filepath)
"""逐个文件逐个文件处理"""
for file in files:
"""输出文件名"""
print(file)
"""打开此csv文件"""
df = pd.read_csv(filepath + file)
"""将csv文件以‘,’为间隔符,并提取第四列,也就是速度那一列,并覆盖原有的那一列"""
df['roadloc'] = df['roadloc'].str.split(',', expand = True)[3]
"""保存csv文件"""
df.to_csv(filepath + file, index = False)
"""将保存信息输出"""
print(df)
"""先分段,并得到所有不重复的路段"""
def GetNameAndSpeed(file):
"""输出文件名"""
print(file)
"""读取csv文件"""
df = pd.read_csv(file)
"""添加列roadNames,并将分列后的第三列也就是路段名那一列赋到这一列上"""
df['roadNames'] = df['roadloc'].str.split(',', expand = True)[2]
"""同上,这次提取的是速度"""
df['roadSpeed'] = df['roadloc'].str.split(',', expand = True)[3]
"""删掉roadloc这一列,axis = 1代表设置删除列, 如果axis = 0代表删除行"""
df = df.drop('roadloc', axis = 1)
"""以roadNames为标准去除重复行"""
df = df.drop_duplicates(['roadNames'])
"""保存csv"""
df.to_csv('namespeed.csv', index = False)
"""将保存的内容输出"""
print(df)
def GetName(file):
print(file)
df = pd.read_csv(file)
df['roadNames'] = df['roadloc'].str.split(',', expand = True)[2]
df = df.drop('roadloc', axis = 1)
df = df.drop_duplicates(['roadNames'])
df.to_csv('roadNames.csv', index = False)
print(df)
"""合并"""
"""加载csv文件,并以二维列表的形式返回"""
def LoadCsv(filepath):
with open(filepath, encoding='utf-8') as file:
csv_obj = csv.reader(file)
return [g for g in csv_obj]
"""写入csv"""
def WriteCsv(lists, path):
with open(path, 'w', newline = '', encoding='utf-8') as rows:
writer = csv.writer(rows)
writer.writerows(lists)
"""合并函数"""
def Merge():
"""读取被写入文件"""
baseCsv = LoadCsv('result.csv')
"""读取提取数据的文件"""
curCsv = LoadCsv('namespeed.csv')
for curRoad in curCsv:
for roadName in baseCsv:
if curRoad[0] in roadName[0]:
roadName.append(curRoad[1])
continue
"""写入csv文件"""
WriteCsv(baseCsv, 'result.csv')
def main():
"""读取此目录下所有文件"""
files = os.listdir(filepath)
"""一个一个csv文件处理"""
for file in files:
"""输出文件名"""
print(file)
#Devide(fileapth)
GetName(filepath + file)
GetNameAndSpeed(filepath + file)
Merge()
if __name__ == '__main__':
main()
免责声明:本文来自互联网新闻客户端自媒体,不代表本网的观点和立场。
合作及投稿邮箱:E-mail:editor@tusaishared.com