import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def read_csv_file(file):
    """
    Read a CSV file in which the first two columns are the first
    and second indexes.
    
    Args:
        file: the name of the CSV file.

    Returns:
        df: a pandas dataframe of the CSV file.
    """ 
    try:
        df = pd.read_csv(file) 
        return df
    except FileNotFoundError:
        print(f"Error: {file_path} not found.")
        return
        
def plot_major_mean(df):
    """   Plot the mean numbe of majors in programs.
    """
    #   Get the positions of the columns for groupby and aggregate.
    groupby_cols_pos = [1]     #   Program
    agg_cols_pos = [3]         #   Major

    #  Get the corresponding column names of the locations.
    groupby_cols_names = df.columns[groupby_cols_pos].tolist()
    agg_cols_names = df.columns[agg_cols_pos].tolist()

    major_group = df.groupby(groupby_cols_names)[agg_cols_names].mean().round(1)
    #   print(major_group)

    #   Prepare lables and titles.
    year_min = df['Year'].min()
    year_max = df['Year'].max()
    title = f"Average # of majors from {year_min} to {year_max}"
    xlabel = "Program"
    ylabel = title
    
    #   Create figure and axis
    fig, ax = plt.subplots()
    
    #   Plot using the grouby object.
    major_group.plot(kind='bar', ax=ax)

    #   Set labels and title.
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.set_title(title)    
    plt.show()

def plot_program_major(df):
    #   Get the positions of the columns for groupby and aggregate.
    groupby_cols_pos = [1,2]     #   Program, Year
    agg_cols_pos = [3]           #   Major

    #  Get the corresponding column names of the locations.
    groupby_cols_names = df.columns[groupby_cols_pos].tolist()
    agg_cols_names = df.columns[agg_cols_pos].tolist()

    major_group = df.groupby(groupby_cols_names)[agg_cols_names].sum()
    
    # unstack: each year of the level 0 (top level) index (i.e., Program) becomes a new column (columns CENG, CSCI, CIS, and so on).
    df_unstacked = major_group['Major'].unstack(level=0)
    
    #   Create figure and axis
    fig, ax = plt.subplots()
    #   Plot
    df_unstacked.plot(kind='line', ax=ax) 
    
    #   Create figure and axis
    title = f"# of majors"
    xlabel = "Program"
    ylabel = title 

    #   set labels and title.
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.set_title(title)    
    plt.show()

def plot_college_major(df):
    #   Get the positions of the columns for groupby and aggregate.
    groupby_cols_pos = [0,2]     #   College, Year
    agg_cols_pos = [3]           #   Major

    #  Get the corresponding column names of the locations.
    groupby_cols_names = df.columns[groupby_cols_pos].tolist()
    agg_cols_names = df.columns[agg_cols_pos].tolist()

    major_group = df.groupby(groupby_cols_names)[agg_cols_names].sum()
    df_unstacked = major_group['Major'].unstack(level=0)

    #   Create figure and axis
    fig, ax = plt.subplots()


    #   Add numbers on top of the bars.
    for p in ax.patches:
        ax.annotate(str(p.get_height()), (p.get_x(), p.get_height() * 1.01))

    #   Create figure and axis 
    title = f"# of majors"
    xlabel = "College"
    ylabel = title 

    #   Set labels and title.
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.set_title(title)    
    plt.show()
   
def main():
    """ main program
    """
    df = read_csv_file('majors.csv')
    plot_major_mean(df)   
    plot_program_major(df)
    plot_college_major(df)

if __name__ == "__main__":
    main()