Data Analysis using Python Pandas library

Step1: Import file

data = pd.read_csv('PureChatExport_Jun.csv', encoding="utf-8")

Step2: Convert str to datetime

data['DateCreated'] =  pd.to_datetime(data['DateCreated'], format="%m/%d/%Y - %I:%M %p")

Pie Chart

#Data to plot
sizes = data.groupby('ChatType').agg('count')
labels = sizes.index.values

plt.pie(sizes['IsArchived'], labels=labels, autopct='%1.1f%%', shadow=True, startangle=140)
plt.axis('equal')
plt.show()

Horizontal Bar Chart

#Data to plot
sizes = data.groupby('FirstOperator').agg('count')
labels = sizes.index.values

plt.barh(range(len(labels)), sizes['IsArchived'], height=0.7, color='steelblue', alpha=0.8)      # graph from bottom to top
plt.yticks(range(len(labels)), labels)
for x, y in enumerate(sizes['IsArchived']):
    plt.text(y + 0.2, x - 0.1, '%s' % y)
plt.show()

Multi-data Bar Chart

sizes = data.groupby(pd.Grouper(key='DateCreated', freq='W-MON')).agg('count')
sizes.index = sizes.index.strftime('%y%b')
labels = range(1,len(sizes.index)+1) # 横坐标刻度显示值
num_list1 = sizes['IsArchived']      # 纵坐标值1

sizes2 = missed.groupby(pd.Grouper(key='DateCreated', freq='W-MON')).agg('count')
num_list2 = sizes2['IsArchived']    # 纵坐标值2

x = range(len(num_list1))

#绘制条形图
#left:长条形中点横坐标
#height:长条形高度
#width:长条形宽度,默认值0.8
#label:为后面设置legend准备

rects1 = plt.bar(range(len(num_list1)), num_list1, 0.2, label='Total')
rects2 = plt.bar([i + 0.25 for i in x], num_list2,0.2, label='Missed')
plt.ylabel("数量")

#设置x轴刻度显示值
#参数一:中点坐标
#参数二:显示值

plt.xticks([index + 0.15 for index in x], labels)
plt.xlabel("周数")
plt.title("图5:Total Chats VS Missed Chats by Week")
plt.legend()     # 设置题注
# 编辑文本
for rect in rects1:
    height = rect.get_height()
    plt.text(rect.get_x() + rect.get_width() / 2, height+1, str(height), ha="center", va="bottom")
for rect in rects2:
    height = rect.get_height()
    plt.text(rect.get_x() + rect.get_width() / 2, height+1, str(height), ha="center", va="bottom")
plt.show()

Leave a comment