import numpy as np import pandas as pd import matplotlib.pyplot as plt def read_csv_file(file): """ Read a CSV file in which the first two columns are the first and second indexes. Args: file: the name of the CSV file. Returns: df: a pandas dataframe of the CSV file. """ try: df = pd.read_csv(file) return df except FileNotFoundError: print(f"Error: {file_path} not found.") return def plot_major_mean(df): """ Plot the mean numbe of majors in programs. """ # Get the positions of the columns for groupby and aggregate. groupby_cols_pos = [1] # Program agg_cols_pos = [3] # Major # Get the corresponding column names of the locations. groupby_cols_names = df.columns[groupby_cols_pos].tolist() agg_cols_names = df.columns[agg_cols_pos].tolist() major_group = df.groupby(groupby_cols_names)[agg_cols_names].mean().round(1) # print(major_group) # Prepare lables and titles. year_min = df['Year'].min() year_max = df['Year'].max() title = f"Average # of majors from {year_min} to {year_max}" xlabel = "Program" ylabel = title # Create figure and axis fig, ax = plt.subplots() # Plot using the grouby object. major_group.plot(kind='bar', ax=ax) # Set labels and title. ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) ax.set_title(title) plt.show() def plot_program_major(df): # Get the positions of the columns for groupby and aggregate. groupby_cols_pos = [1,2] # Program, Year agg_cols_pos = [3] # Major # Get the corresponding column names of the locations. groupby_cols_names = df.columns[groupby_cols_pos].tolist() agg_cols_names = df.columns[agg_cols_pos].tolist() major_group = df.groupby(groupby_cols_names)[agg_cols_names].sum() # unstack: each year of the level 0 (top level) index (i.e., Program) becomes a new column (columns CENG, CSCI, CIS, and so on). df_unstacked = major_group['Major'].unstack(level=0) # Create figure and axis fig, ax = plt.subplots() # Plot df_unstacked.plot(kind='line', ax=ax) # Create figure and axis title = f"# of majors" xlabel = "Program" ylabel = title # set labels and title. ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) ax.set_title(title) plt.show() def plot_college_major(df): # Get the positions of the columns for groupby and aggregate. groupby_cols_pos = [0,2] # College, Year agg_cols_pos = [3] # Major # Get the corresponding column names of the locations. groupby_cols_names = df.columns[groupby_cols_pos].tolist() agg_cols_names = df.columns[agg_cols_pos].tolist() major_group = df.groupby(groupby_cols_names)[agg_cols_names].sum() df_unstacked = major_group['Major'].unstack(level=0) # Create figure and axis fig, ax = plt.subplots() # Add numbers on top of the bars. for p in ax.patches: ax.annotate(str(p.get_height()), (p.get_x(), p.get_height() * 1.01)) # Create figure and axis title = f"# of majors" xlabel = "College" ylabel = title # Set labels and title. ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) ax.set_title(title) plt.show() def main(): """ main program """ df = read_csv_file('majors.csv') plot_major_mean(df) plot_program_major(df) plot_college_major(df) if __name__ == "__main__": main()