python数据处理和可视化操作
import pandas as pd
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
# master_data: data import and filter
df_master_data = pd.read_csv("master_data.csv")
regions = ["Metropolitan East", "Metropolitan North", "Metropolitan West", "Metropolitan South West"]
df_master_data = df_master_data[df_master_data["AECG_region"].isin(regions)]
print(df_master_data)
# Remove NA
df_master_data = df_master_data.dropna(subset=['ICSEA_value', 'Town_suburb'])
print(df_master_data.isna().sum())
# Q2
## import data
trend_df = pd.read_csv("enrollmentnum.csv")
columns = ["School Code", "School Name", "HC_2019", "HC_2020", "HC_2021", "HC_2022", "HC_2023"]
trend_df = trend_df[columns]
print(trend_df)
# Remove NA, SP, and whitespace
for col in ["HC_2019", "HC_2020", "HC_2021", "HC_2022", "HC_2023"]:
trend_df[col] = trend_df[col].astype(str).str.strip()
trend_df[col].replace({
"NA": None, "SP": None}, inplace=True)
trend_df[col] = pd.to_numeric(trend_df[col], errors='coerce')
trend_df = trend_df.dropna(subset=["HC_2019", "HC_2020", "HC_2021", "HC_2022", "HC_2023"])
print(trend_df.isna().sum())
# merge
trend_df.rename(columns=
原文地址:https://blog.csdn.net/huanghm88/article/details/143832467
免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!