自学内容网 自学内容网

python数据处理和可视化操作

import pandas as pd 
import seaborn as sns
import plotly.express as px 
import matplotlib.pyplot as plt

# master_data: data import and filter
df_master_data = pd.read_csv("master_data.csv")
regions = ["Metropolitan East", "Metropolitan North", "Metropolitan West", "Metropolitan South West"]
df_master_data = df_master_data[df_master_data["AECG_region"].isin(regions)]

print(df_master_data)

# Remove NA
df_master_data = df_master_data.dropna(subset=['ICSEA_value', 'Town_suburb'])
print(df_master_data.isna().sum())




# Q2
## import data
trend_df = pd.read_csv("enrollmentnum.csv")

columns = ["School Code", "School Name", "HC_2019", "HC_2020", "HC_2021", "HC_2022", "HC_2023"]
trend_df = trend_df[columns]
print(trend_df)

# Remove NA, SP, and whitespace
for col in ["HC_2019", "HC_2020", "HC_2021", "HC_2022", "HC_2023"]:
    trend_df[col] = trend_df[col].astype(str).str.strip()  
    trend_df[col].replace({
   "NA": None, "SP": None}, inplace=True)  
    trend_df[col] = pd.to_numeric(trend_df[col], errors='coerce')  

trend_df = trend_df.dropna(subset=["HC_2019", "HC_2020", "HC_2021", "HC_2022", "HC_2023"])
print(trend_df.isna().sum())


# merge
trend_df.rename(columns=

原文地址:https://blog.csdn.net/huanghm88/article/details/143832467

免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!