import os
import pandas as pd
from typing import Union
import emodpy_hiv.demographics.un_world_pop as unwp
import emodpy_hiv.plotting.xy_plot as xy_plot
import emodpy_hiv.plotting.helpers as helpers
TEST_include_dir_or_filename = True
COL_NAME_YEAR = "Year" # noqa: E221
COL_NAME_NODE_ID = " NodeId" # noqa: E221
COL_NAME_GENDER = " Gender" # noqa: E221
COL_NAME_HAS_HIV = " HasHIV" # noqa: E221
COL_NAME_RISK = " IP_Key:Risk" # noqa: E221
COL_NAME_ACCESS = " IP_Key:Accessibility" # noqa: E221
COL_NAME_STATE = " IP_Key:CascadeState" # noqa: E221
COL_NAME_IS_CIRC = " IsCircumcised" # noqa: E221
COL_NAME_AGE = " Age" # noqa: E221
COL_NAME_POP = " Population" # noqa: E221
COL_NAME_INFECTED = " Infected" # noqa: E221
COL_NAME_NEW_INF = " Newly Infected" # noqa: E221
COL_NAME_ON_ART = " On_ART" # noqa: E221
COL_NAME_DIED = " Died" # noqa: E221
COL_NAME_DIED_HIV = " Died_from_HIV" # noqa: E221
COL_NAME_TESTED_OR_ART = " Tested Past Year or On_ART" # noqa: E221
COL_NAME_TESTED_EVER = " Tested Ever" # noqa: E221
COL_NAME_DIAGNOSED = " Diagnosed" # noqa: E221
COL_NAME_NEW_TESTED_POS = " Newly Tested Positive"
COL_NAME_NEW_TESTED_NEG = " Newly Tested Negative"
[docs]
def create_title(base_title: str = "",
node_id: int = None,
gender: str = None,
show_avg_per_run: bool = False,
show_fraction: bool = False,
show_fraction_of: bool = False,
fraction_of_str: str = "",
hiv_negative: bool = False,
has_age_bins: bool = False):
"""
Use the input arguments to create a title for the plot (and filename).
Args:
base_title (str, optional):
This is the core string to place in the title. It describes the specific data being plotted.
node_id (int, optional):
The ID of the node for which the data is being filtered for.
gender (str, optional):
The string (Male or Female) for the gender that data is being filtered for.
show_avg_per_run (bool, optional):
True indicates that the data is an average over multiple runs.
show_fraction (bool, optional):
True indicates that the data is not true counts but a fraction
(i.e. a count divided by another counter)
show_fraction_of (bool, optional):
When the denominator of the fraction can be different things,
say population or infected, this allows you to specify the option
that is not population.
fraction_of_str (str, optional):
If 'show_fraction_of' is true, then this argument an be used to include
in the plot what the denominator is.
hiv_negative (bool, optional):
If True, then the title will indicate that the data is for people without HIV.
has_age_bins (bool, optional):
If True, then 'by Age' is added to the title.
Returns:
A string to be used a the top line title of the plot.
"""
title = ""
if show_avg_per_run and not show_fraction:
title = "Average Per Run "
elif not show_avg_per_run and show_fraction:
title = title + "Fraction of "
elif show_avg_per_run and show_fraction:
title = "Average Fraction of "
if show_fraction:
if show_fraction_of:
title = title + fraction_of_str
if gender == "Female":
title = title + "Females"
elif gender == "Male":
title = title + "Males"
else:
title = title + "People"
if hiv_negative:
title = title + " Without HIV"
if base_title:
title = title + " " + base_title
if has_age_bins:
title = title + " by Age"
if node_id is not None:
title = title + " - Node " + str(node_id)
if not show_avg_per_run:
title = title + " Per Run"
return title
[docs]
def create_y_axis_name(base_title: str = "",
node_id: int = None,
gender: str = None,
show_avg_per_run: bool = False,
show_fraction: bool = False,
show_fraction_of: bool = False,
fraction_of_str: str = "",
has_age_bins: bool = False):
"""
Given the arguments, create a label for the y-axis that describes the data being plotted.
Args:
base_title (str, optional):
This is the core string to place in the y-label. It describes what is being plotted.
node_id (int, optional):
TBD
gender (str, optional):
The string (Male or Female) for the gender that data is being filtered for.
show_avg_per_run (bool, optional):
TBD
show_fraction (bool, optional):
True indicates that the data is not true counts but a fraction
(i.e. a count divided by another counter)
show_fraction_of (bool, optional):
When the denominator of the fraction can be different things,
say population or infected, this allows you to specify the option
that is not population.
fraction_of_str (str, optional):
If 'show_fraction_of' is true, then this argument an be used to include
in the plot what the denominator is.
has_age_bins (bool, optional):
TBD
Returns:
A string to be used a the top line title of the plot.
"""
y_axis_name = "Number of "
if show_fraction:
y_axis_name = "Fraction of "
if show_fraction_of:
y_axis_name = y_axis_name + fraction_of_str
if gender == "Female":
y_axis_name = y_axis_name + "Females"
elif gender == "Male":
y_axis_name = y_axis_name + "Males"
else:
y_axis_name = y_axis_name + "People"
y_axis_name = y_axis_name + base_title
return y_axis_name
[docs]
def create_df_for_plot_by_stratification(combined_df: pd.DataFrame,
col_name_prefixs: list[str],
age_bin_list: list[float] = None,
show_fraction: bool = False):
if show_fraction:
total_df = pd.DataFrame()
total_df.index = combined_df.index
age_str_list = [""]
if age_bin_list:
for age_index in range(len(age_bin_list) - 1):
age_min = age_bin_list[age_index]
age_max = age_bin_list[age_index + 1]
age_str_list.append(":" + str(age_min) + " - " + str(age_max))
for prefix in col_name_prefixs:
for age_str in age_str_list:
total_label = "total-" + prefix + "-" + age_str
total_df[total_label] = 0
for column_name in combined_df.columns:
if (age_str in column_name) and (prefix in column_name):
total_df[total_label] = total_df[total_label] + combined_df[column_name]
for column_name in combined_df.columns:
if (age_str in column_name) and (prefix in column_name):
combined_df[column_name] = combined_df[column_name] / total_df[total_label]
return combined_df
[docs]
def plot_population_for_dir(dir_or_filename: str,
unworld_pop_filename: str,
country: str,
version: str,
x_base_population: float = 1.0,
show_avg_per_run: bool = False,
gender: str = None,
age_bin: float = None,
img_dir: str = None):
"""
Plot the population for the given age bin against the data in the UN World Population spreadsheet.
Args:
dir_or_filename (str, required):
The directory or filename containing the ReportHIVByAgeAndGender.csv files.
unworld_pop_filename (str, required):
The name and path to a UN World Pop Excel spreadsheet where the 'country' parameter specifies
a country name found in the spreadsheet and the 'version' specifies the year of the data.
These values are needed to know how to read the data in the spreadsheet.
country (str, required):
The name of the country found in the spreadsheet to extract the data for.
version (str, required):
The year associated with when the UN World Pop file was created.
PLEASE NOTE: This year is a string.
x_base_population (float, optional):
The 'x_Base_Population' value (found in the config) is used to divide by the population
numbers in the CSV file so you get numbers that match the true population.
show_avg_per_run (bool, optional):
If 'dir_or_filename' is a directory, this will calculate the average number of
people with the given risk type at a given time step between the files.
Default is False.
gender (str, optional):
The string (Male or Female) for the gender that data is being filtered for.
age_bin (float, optional):
If provided, the data for this specific age stratification will be plotted.
Both the data in the report file and the UN World Pop file must have this
stratification. If you do not provide a value, then the population is not
broken up by age (i.e. total population).
img_dir (str, optional):
Directory to save the images. If None, the images will not be saved and a window will be opened.
Returns:
None - but image will be saved or window opened.
"""
if x_base_population <= 0.0:
raise ValueError("'x_base_population' must be a value greater than zero.")
# Get the file or files in the directory
dir_filenames = helpers.get_filenames(dir_or_filename=dir_or_filename,
file_prefix="ReportHIVByAgeAndGender",
file_extension=".csv")
# ---------------------------------------------------------------
# Extract the data from each file and combine into one dataframe
# ---------------------------------------------------------------
combined_df = pd.DataFrame()
for fn in dir_filenames:
df = extract_population_data(fn, gender=gender, age_bin=age_bin)
if len(combined_df.columns) == 0:
combined_df.index = df.index
if show_avg_per_run:
combined_df[COL_NAME_POP] = 0
if show_avg_per_run:
combined_df[COL_NAME_POP] = combined_df[COL_NAME_POP] + df[COL_NAME_POP] / x_base_population
else:
combined_df[fn] = df[COL_NAME_POP] / x_base_population
if show_avg_per_run:
for column_name in combined_df.columns:
combined_df[column_name] = combined_df[column_name] / len(dir_filenames)
# ----------------------------------------------------------------
# Extract the expected population data from the UN World Pop file
# and put into dataframe. Get the data for the years that you have
# in the CSV file.
# ----------------------------------------------------------------
years = combined_df.index.values.astype(int).tolist()
years = list(set(years))
date_column = COL_NAME_YEAR
if version == "2015":
date_column = "Reference date (as of 1 July)"
unwp_df2 = None
title2 = dir_or_filename
if unworld_pop_filename:
title2 = str(unworld_pop_filename)
unwp_df = unwp.extract_population_by_age(country=country,
version=version,
years=years,
filename=unworld_pop_filename)
unwp_df.index = unwp_df[date_column].astype(float)
del unwp_df[date_column]
rename_dict = {}
for name in unwp_df.columns:
rename_dict[name] = int(name)
unwp_df = unwp_df.rename(columns=rename_dict)
unwp_df2 = pd.DataFrame()
unwp_df2.index = unwp_df.index.astype(float)
if age_bin is not None:
unwp_df2["Expected Population"] = unwp_df[age_bin]
else:
unwp_df2["Expected Population"] = unwp_df.sum(axis=1)
# --------------------------
# create title and plot data
# --------------------------
title = ""
if show_avg_per_run:
title = title + "Average Per Run"
title = title + "Population"
if gender is not None:
title = title + " - " + gender
if age_bin is not None:
if age_bin == 80:
title = title + " - " + str(age_bin) + "+"
else:
title = title + " - " + str(age_bin) + "-" + str(age_bin + 5)
if not TEST_include_dir_or_filename:
title2 = None
xy_plot.xy_plot(img_dir=img_dir,
df=combined_df,
expected_df=unwp_df2,
title_1=title,
title_2=title2,
y_axis_name="Number of People",
fraction_of_total=False,
show_legend=show_avg_per_run,
show_markers=show_avg_per_run,
min_x=None, max_x=None, min_y=None, max_y=None,
x_axis_as_log_scale=False,
y_axis_as_log_scale=False)
[docs]
def plot_population_by_gender(filename: str,
img_dir: str = None):
"""
For the given file, plot the population for each gender over time.
Args
filename (str, required):
The name and path of the ReportHIVByAgeAndGender.csv file to extract the data from.
img_dir (str, optional):
Directory to save the images. If None, the images will not be saved and a window will be opened.
Returns:
None - but image will be saved or window opened.
"""
if not os.path.isfile(filename):
raise ValueError(f"The filename, '{filename}' given does not appear to be a file.")
df = pd.read_csv(filename)
if COL_NAME_GENDER not in df.columns:
raise ValueError(f"'{COL_NAME_GENDER}' column does not exist in the file({filename}).")
pv = df.pivot_table(index=COL_NAME_YEAR,
columns=[COL_NAME_GENDER],
values=COL_NAME_POP,
aggfunc="sum")
df2 = pd.DataFrame()
df2.index = pv.index
df2["Number of Men"] = pv[0]
df2["Number of Women"] = pv[1]
xy_plot.xy_plot(img_dir=img_dir,
df=df2,
title_1="Population by Gender",
title_2=None,
y_axis_name="Number of People",
fraction_of_total=False,
min_x=None, max_x=None, min_y=None, max_y=None,
x_axis_as_log_scale=False,
y_axis_as_log_scale=False)
[docs]
def plot_population_by_ip(dir_or_filename: str,
exp_dir_or_filename: str = None,
node_id: int = None,
gender: str = None,
age_bin_list=None,
ip_key: str = None,
ip_values: list[str] = None,
show_avg_per_run: bool = False,
show_fraction: bool = False,
expected_values: dict = None,
img_dir: str = None):
"""
For the indicated files, create a plot showing who has what value of the give IP key over time.
Args:
dir_or_filename (str, required):
The directory or filename containing the ReportHIVByAgeAndGender.csv files.
exp_dir_or_filename (str, required):
The expected or alternate directory or filename containing the ReportHIVByAgeAndGender.csv files.
node_id (int, optional):
The ID of the node for which the data is being filtered for.
gender (str, optional):
The string (Male or Female) for the gender that data is being filtered for.
age_bin_list (list[float], optional):
A list of ages, in years, where the population with risk value will be counted
for each bin. For example, if you enter [10, 25, 30, 55], there will be three
age bins with the following ranges: [10-25), [25-30), [30-55)
ip_key (str, required):
Extract the data from the files based on this IP stratification column.
If the files has not been stratified by this IP, you will need to re-run the
simulations with stratification turned on.
ip_values (list[str], optional):
By default, this plotting tool uses all of the values for the IP, however, this allows
you to only include the ones you are interested (i.e. a subset to the total possible
values for the IP)
show_avg_per_run (bool, optional):
If 'dir_or_filename' is a directory, this will calculate the average number of
people with the given risk type at a given time step between the files.
Default is False.
show_fraction (bool, optional):
True indicates that the number of people the given IP value will be divided by
the sum of all the people with all of the selected IP values. For example, if the
IP key were Risk and you selected to only plot LOW and MEDIUM, then the total number
of people with LOW will be divided by the total number of people with either LOW or MEDIUM.
expected_values (dict, optional):
If the user provides this dictionary, the constant expected value of each IP will be
plotted. This should be a dictionary with an IP value as the key and a constant expected value.
There should be an IP value for each IP value plotted.
img_dir (str, optional):
Directory to save the images. If None, the images will not be saved and a window will be opened.
Returns:
None - but image will be saved or window opened.
"""
if (exp_dir_or_filename is not None) and (expected_values is not None):
raise ValueError("You cannot specify both 'exp_dir_or_filename' and 'expected_values'. "
+ "Please only specify one of these.")
if ip_key is None:
raise ValueError("IP Key must be specified.")
ip_col_name = " IP_Key:" + ip_key
combined_df, col_name_prefixs = extract_population_data_by_stratification_for_dir(dir_or_filename=dir_or_filename,
node_id=node_id,
gender=gender,
age_bin_list=age_bin_list,
start_column_name=ip_col_name,
strat_values=ip_values,
show_avg_per_run=show_avg_per_run)
combined_df = create_df_for_plot_by_stratification(combined_df=combined_df,
col_name_prefixs=col_name_prefixs,
age_bin_list=age_bin_list,
show_fraction=show_fraction)
expected_df = None
if expected_values:
expected_df = pd.DataFrame()
expected_df.index = combined_df.index
for value in ip_values:
expected_df["Expected: " + value] = expected_values[value]
elif exp_dir_or_filename is not None:
expected_df, exp_col_name_prefixs = extract_population_data_by_stratification_for_dir(dir_or_filename=exp_dir_or_filename,
node_id=node_id,
gender=gender,
age_bin_list=age_bin_list,
start_column_name=ip_col_name,
strat_values=ip_values,
show_avg_per_run=show_avg_per_run)
expected_df = create_df_for_plot_by_stratification(combined_df=expected_df,
col_name_prefixs=exp_col_name_prefixs,
age_bin_list=age_bin_list,
show_fraction=show_fraction)
base_title = "with " + ip_key + "=X"
title = create_title(base_title=base_title,
node_id=node_id,
gender=gender,
show_avg_per_run=show_avg_per_run,
show_fraction=show_fraction,
show_fraction_of=False,
fraction_of_str=None,
has_age_bins=(age_bin_list is not None))
y_axis_name = create_y_axis_name(base_title=" " + base_title,
node_id=node_id,
gender=gender,
show_avg_per_run=show_avg_per_run,
show_fraction=show_fraction,
show_fraction_of=False,
fraction_of_str=None,
has_age_bins=(age_bin_list is not None))
title2 = None
if TEST_include_dir_or_filename:
if exp_dir_or_filename is not None:
title2 = "color= " + dir_or_filename
title2 = title2 + "\nblack = " + exp_dir_or_filename
else:
title2 = dir_or_filename
xy_plot.xy_plot(img_dir=img_dir,
df=combined_df,
expected_df=expected_df,
title_1=title,
title_2=title2,
y_axis_name=y_axis_name,
show_legend=show_avg_per_run,
fraction_of_total=False,
min_x=None, max_x=None, min_y=None, max_y=None,
x_axis_as_log_scale=False,
y_axis_as_log_scale=False)
[docs]
def plot_columns(filename: str,
title: str,
y_axis_name: str,
column_names: list[str],
fraction_of_population: bool = False,
img_dir: str = None):
"""
For a given file, plot the indicated columns versus time (i.e. Year).
Args:
filename (str, required):
The name and path of the ReportHIVByAgeAndGender.csv file to extract the data from.
title (str, required):
The title to put at the top of the plot.
y_axis_name (str, required):
The name to label the y-axis on the plot.
column_names (list[str], required):
The list of column names to plot the data for. The report has a space before each
column name. Please be sure to include it.
fraction_of_population (bool, optional):
If True, divide the count each column by the population for the same stratification.
img_dir (str, optional):
Directory to save the images. If None, the images will not be saved and a window will be opened.
Returns:
None - but image will be saved or window opened.
"""
if not os.path.isfile(filename):
raise ValueError(f"The filename, '{filename}' given does not appear to be a file.")
df = pd.read_csv(filename)
for name in column_names:
if name not in df.columns:
raise ValueError(f"'{name}' column does not exist in the file({filename}).")
if fraction_of_population:
column_names.append(COL_NAME_POP)
pv = df.pivot_table(index=COL_NAME_YEAR,
columns=[],
values=column_names,
aggfunc="sum")
df2 = pd.DataFrame()
df2.index = pv.index
for value in column_names:
if fraction_of_population and (value != COL_NAME_POP):
df2[value] = pv[value] / pv[COL_NAME_POP]
elif not fraction_of_population:
df2[value] = pv[value]
xy_plot.xy_plot(img_dir=img_dir,
df=df2,
title_1=title,
title_2=None,
y_axis_name=y_axis_name,
fraction_of_total=False,
min_x=None, max_x=None, min_y=None, max_y=None,
x_axis_as_log_scale=False,
y_axis_as_log_scale=False)
[docs]
def plot_circumcision_by_age(filename: str,
age_bin_list: list[float],
fraction_of_total: bool = False,
img_dir: str = None):
"""
For a single file, plot the number of men who are circumcised by age.
Args:
filename (str, required):
The name and path of the ReportHIVByAgeAndGender.csv to extract the data from.
age_bin_list (list[float], optional):
A list of ages in years where the population with risk value will be counted
for each bin. For example, if you enter [10, 25, 30, 55], there will be three
age bins with the following ranges: [10-25), [25-30), [30-55)
fraction_of_total (bool, optional):
If True, the number of men who are circumcised will be divided by the total number
of men with that stratification.
img_dir (str, optional):
Directory to save the images. If None, the images will not be saved and a window will be opened.
Returns:
None - but image will be saved or window opened.
"""
if not os.path.isfile(filename):
raise ValueError(f"The filename, '{filename}' given does not appear to be a file.")
df = pd.read_csv(filename)
if COL_NAME_GENDER not in df.columns:
raise ValueError(f"'{COL_NAME_GENDER}' column does not exist in the file({filename}).")
if COL_NAME_AGE not in df.columns:
raise ValueError(f"'{COL_NAME_AGE}' column does not exist in the file({filename}).")
ages = df[COL_NAME_AGE].unique()
y_axis_name = "Number of Circumcised Men"
if fraction_of_total:
y_axis_name = "Fraction of Men Circumcised by Age"
pv = df.pivot_table(index=COL_NAME_YEAR,
columns=[COL_NAME_GENDER, COL_NAME_AGE, COL_NAME_IS_CIRC],
values=COL_NAME_POP,
aggfunc="sum")
df2 = pd.DataFrame()
df2.index = pv.index
gender_id = 0
is_circumcised = 1
for index in range(len(age_bin_list) - 1):
label = f"Men Circumcised: {age_bin_list[index]}-{age_bin_list[index + 1]}"
pv[label] = 0
pv["total"] = 0
for age in ages:
if (age_bin_list[index] <= age) and (age < age_bin_list[index + 1]):
pv["total"] = pv["total"] + pv[(gender_id, age, 0)] + pv[(gender_id, age, 1)]
pv[label] = pv[label] + pv[(gender_id, age, is_circumcised)]
df2[label] = pv[label]
if fraction_of_total:
df2[label] = df2[label] / pv["total"]
xy_plot.xy_plot(img_dir=img_dir,
df=df2,
title_1="Circumcised Men by Age",
title_2=None,
y_axis_name=y_axis_name,
fraction_of_total=False,
min_x=None, max_x=None, min_y=None, max_y=None,
x_axis_as_log_scale=False,
y_axis_as_log_scale=False)
[docs]
def create_df_for_plot_by_age(combined_df: pd.DataFrame,
col_name_prefixs: list[str],
main_column_name: str,
gender: str,
age_bins: list[float],
show_fraction: bool,
fraction_of: bool,
fraction_of_column_name: str):
df2 = pd.DataFrame()
df2.index = combined_df.index
if show_fraction:
for prefix in col_name_prefixs:
fraction_label = prefix + main_column_name + "/Population"
if fraction_of:
fraction_label = prefix + main_column_name + "/" + fraction_of_column_name[1:]
if age_bins:
for age_index in range(len(age_bins) - 1):
age_min = age_bins[age_index ] # noqa: E202
age_max = age_bins[age_index + 1]
main_label = prefix + main_column_name + ":" + str(age_min) + " - " + str(age_max) # noqa: E221
pop_label = prefix + COL_NAME_POP + ":" + str(age_min) + " - " + str(age_max) # noqa: E221
other_label = prefix + fraction_of_column_name + ":" + str(age_min) + " - " + str(age_max) # noqa: E221
fraction_label_age = fraction_label + ":" + str(age_min) + " - " + str(age_max) # noqa: E221
if fraction_of:
df2[fraction_label_age] = combined_df[main_label] / combined_df[other_label]
else:
df2[fraction_label_age] = combined_df[main_label] / combined_df[pop_label]
df2[fraction_label_age] = df2[fraction_label_age].fillna(0)
else:
if fraction_of:
df2[fraction_label] = combined_df[prefix + main_column_name] / combined_df[prefix + fraction_of_column_name]
else:
df2[fraction_label] = combined_df[prefix + main_column_name] / combined_df[prefix + COL_NAME_POP]
df2[fraction_label] = df2[fraction_label].fillna(0)
else:
new_column_names = {}
for column_name in combined_df.columns:
if main_column_name not in column_name:
del combined_df[column_name]
else:
label = column_name
if gender:
label = gender + column_name
new_column_names[column_name] = label
combined_df = combined_df.rename(columns=new_column_names)
df2 = combined_df.copy()
return df2
[docs]
def base_plot_by_age(base_title: str,
main_column_name: str,
dir_or_filename: str,
exp_dir_or_filename: str = None,
node_id: int = None,
age_bins: list[float] = None,
gender: str = None,
filter_by_hiv_negative: bool = False,
other_strat_column_name: str = None,
other_strat_value_a: Union[int, float, str] = None,
other_strat_value_b: Union[int, float, str] = None,
show_avg_per_run: bool = False,
show_fraction: bool = False,
fraction_of: bool = False,
fraction_of_column_name: str = None,
fraction_of_str: str = None,
img_dir: str = None):
other_data_column_names = []
if main_column_name != COL_NAME_POP:
other_data_column_names.append(main_column_name)
if show_fraction and fraction_of:
other_data_column_names.append(fraction_of_column_name)
combined_df, col_name_prefixs, hiv_neg = extract_population_data_multiple_ages_for_dir(dir_or_filename=dir_or_filename,
node_id=node_id,
gender=gender,
age_bin_list=age_bins,
show_avg_per_run=show_avg_per_run,
filter_by_hiv_negative=filter_by_hiv_negative,
other_strat_column_name=other_strat_column_name,
other_strat_value_a=other_strat_value_a,
other_strat_value_b=other_strat_value_b,
other_data_column_names=other_data_column_names)
df2 = create_df_for_plot_by_age(combined_df=combined_df,
col_name_prefixs=col_name_prefixs,
main_column_name=main_column_name,
gender=gender,
age_bins=age_bins,
show_fraction=show_fraction,
fraction_of=fraction_of,
fraction_of_column_name=fraction_of_column_name)
exp_df2 = None
exp_hiv_neg = None
if exp_dir_or_filename:
exp_combined_df, exp_col_name_prefixs, exp_hiv_neg = extract_population_data_multiple_ages_for_dir(dir_or_filename=exp_dir_or_filename,
node_id=node_id,
gender=gender,
age_bin_list=age_bins,
show_avg_per_run=show_avg_per_run,
filter_by_hiv_negative=filter_by_hiv_negative,
other_strat_column_name=other_strat_column_name,
other_strat_value_a=other_strat_value_a,
other_strat_value_b=other_strat_value_b,
other_data_column_names=other_data_column_names)
exp_df2 = create_df_for_plot_by_age(combined_df=exp_combined_df,
col_name_prefixs=exp_col_name_prefixs,
main_column_name=main_column_name,
gender=gender,
age_bins=age_bins,
show_fraction=show_fraction,
fraction_of=fraction_of,
fraction_of_column_name=fraction_of_column_name)
if exp_hiv_neg is not None and hiv_neg != exp_hiv_neg:
raise ValueError("The two sets of data do not each have the HasHIV column.\n"
+ "The files in {dir_or_filename} have HasHIV={hiv_neg}.\n"
+ "The files in {exp_dir_or_filename} have HasHIV={exp_hiv_neg}.\n"
+ "Please check the files and make sure they are correct.")
title = create_title(base_title=base_title,
node_id=node_id,
gender=gender,
show_avg_per_run=show_avg_per_run,
show_fraction=show_fraction,
show_fraction_of=fraction_of,
fraction_of_str=fraction_of_str,
hiv_negative=hiv_neg,
has_age_bins=(age_bins is not None))
y_axis_name = create_y_axis_name(base_title=" " + base_title,
node_id=node_id,
gender=gender,
show_avg_per_run=show_avg_per_run,
show_fraction=show_fraction,
show_fraction_of=fraction_of,
fraction_of_str=fraction_of_str,
has_age_bins=(age_bins is not None))
title2 = None
if TEST_include_dir_or_filename:
if exp_df2 is not None:
title2 = "color=" + dir_or_filename
title2 = title2 + "\nblack=" + exp_dir_or_filename
else:
title2 = dir_or_filename
xy_plot.xy_plot(img_dir=img_dir,
df=df2,
expected_df=exp_df2,
title_1=title,
title_2=title2,
y_axis_name=y_axis_name,
show_legend=show_avg_per_run,
fraction_of_total=False,
min_x=None, max_x=None, min_y=None, max_y=None,
x_axis_as_log_scale=False,
y_axis_as_log_scale=False)
[docs]
def plot_onART_by_age(dir_or_filename: str,
exp_dir_or_filename: str = None,
node_id: int = None,
gender: str = None,
age_bin_list: list[float] = None,
show_avg_per_run: bool = False,
show_fraction: bool = False,
fraction_of_infected: bool = False,
img_dir: str = None):
"""
Create a plot showing information about the people on ART. You can show the fraction of the
population or the fraction of the infected population.
Args:
dir_or_filename (str, required):
The directory or filename containing the ReportHIVByAgeAndGender.csv files.
exp_dir_or_filename (str, required):
The expected or alternate directory or filename containing the ReportHIVByAgeAndGender.csv files.
node_id (int, optional):
The ID of the node for which the data is being filtered for.
gender (str, optional):
The string (Male or Female) for the gender that data is being filtered for.
age_bin_list (list[float], optional):
A list of ages in years where the population with risk value will be counted
for each bin. For example, if you enter [10, 25, 30, 55], there will be three
age bins with the following ranges: [10-25), [25-30), [30-55)
show_avg_per_run (bool, optional):
If 'dir_or_filename' is a directory, this will calculate the average number of
people with the given risk type at a given time step between the files.
Default is False.
show_fraction (bool, optional):
True indicates that the number of people on ART will be divided by either the number
of people in the population or the number of infected people. It depends on the
'fraction_of_infected' parameter.
fraction_of_infected (bool, optional):
If 'show_fraction' is True, then this parameter determines what the divisor is when
creating the fraction. If it is True, it will divide the number of people on ART
by the number of infected people. If it is False, the divisor will the total population
with that stratification.
img_dir (str, optional):
Directory to save the images. If None, the images will not be saved and a window will be opened.
Returns:
None - but image will be saved or window opened.
"""
base_plot_by_age(base_title="On ART",
main_column_name=COL_NAME_ON_ART,
dir_or_filename=dir_or_filename,
exp_dir_or_filename=exp_dir_or_filename,
node_id=node_id,
gender=gender,
age_bins=age_bin_list,
show_avg_per_run=show_avg_per_run,
show_fraction=show_fraction,
fraction_of=fraction_of_infected,
fraction_of_column_name=COL_NAME_INFECTED,
fraction_of_str="Infected ",
img_dir=img_dir)
[docs]
def plot_population_by_age(dir_or_filename: str,
exp_dir_or_filename: str = None,
node_id: int = None,
gender: str = None,
age_bin_list: list[float] = None,
show_avg_per_run: bool = False,
img_dir: str = None):
"""
Create a plot showing the population over time.
Args:
dir_or_filename (str, required):
The directory or filename containing the ReportHIVByAgeAndGender.csv files.
exp_dir_or_filename (str, required):
The expected or alternate directory or filename containing the ReportHIVByAgeAndGender.csv files.
node_id (int, optional):
The ID of the node for which the data is being filtered for.
gender (str, optional):
The string (Male or Female) for the gender that data is being filtered for.
age_bin_list (list[float], optional):
A list of ages in years where the population with risk value will be counted
for each bin. For example, if you enter [10, 25, 30, 55], there will be three
age bins with the following ranges: [10-25), [25-30), [30-55)
show_avg_per_run (bool, optional):
If 'dir_or_filename' is a directory, this will calculate the average number of
people with the given risk type at a given time step between the files.
Default is False.
img_dir (str, optional):
Directory to save the images. If None, the images will not be saved and a window will be opened.
Returns:
None - but image will be saved or window opened.
"""
if age_bin_list is not None and len(age_bin_list) == 0:
raise ValueError("The 'age_bin_list' parameter must be a list of ages in years where the population will be counted for each bin. "
+ "If you do not want to use age bins, please set it to None.")
base_plot_by_age(base_title="",
main_column_name=COL_NAME_POP,
dir_or_filename=dir_or_filename,
exp_dir_or_filename=exp_dir_or_filename,
node_id=node_id,
gender=gender,
age_bins=age_bin_list,
show_avg_per_run=show_avg_per_run,
show_fraction=False,
fraction_of=False,
fraction_of_column_name=None,
fraction_of_str=None,
img_dir=img_dir)
[docs]
def plot_vmmc_by_age(dir_or_filename: str,
exp_dir_or_filename: str = None,
node_id: int = None,
age_bin_list: list[float] = None,
show_avg_per_run: bool = False,
img_dir: str = None):
"""
Create a plot showing information about the men who are circumcised.
Args:
dir_or_filename (str, required):
The directory or filename containing the ReportHIVByAgeAndGender.csv files.
exp_dir_or_filename (str, required):
The expected or alternate directory or filename containing the ReportHIVByAgeAndGender.csv files.
node_id (int, optional):
The ID of the node for which the data is being filtered for.
gender (str, optional):
The string (Male or Female) for the gender that data is being filtered for.
age_bin_list (list[float], optional):
A list of ages in years where the population with risk value will be counted
for each bin. For example, if you enter [10, 25, 30, 55], there will be three
age bins with the following ranges: [10-25), [25-30), [30-55)
show_avg_per_run (bool, optional):
If 'dir_or_filename' is a directory, this will calculate the average number of
people with the given risk type at a given time step between the files.
Default is False.
img_dir (str, optional):
Directory to save the images. If None, the images will not be saved and a window will be opened.
Returns:
None - but image will be saved or window opened.
"""
base_plot_by_age(base_title="VMMC",
main_column_name=COL_NAME_POP,
dir_or_filename=dir_or_filename,
exp_dir_or_filename=exp_dir_or_filename,
node_id=node_id,
gender="Male",
age_bins=age_bin_list,
show_avg_per_run=show_avg_per_run,
show_fraction=False,
fraction_of=False,
fraction_of_column_name="",
fraction_of_str=None,
filter_by_hiv_negative=True,
other_strat_column_name=COL_NAME_IS_CIRC,
other_strat_value_a=1, # Only show circumcised
other_strat_value_b=0, # Not circumcised
img_dir=img_dir)
[docs]
def plot_population_by_age_vs_unworld_pop(filename: str,
unworld_pop_filename: str,
age_bin_list: list[float],
country: str,
version: str,
x_base_population: float = 1.0,
img_dir=None):
"""
For a single file, plot the actual population for a given age bin versus what was expected
in the given UN World Population file. If you have define three age bins, there will be
six curves - an actual and an expected for each bin.
Args:
filename (str, required):
The name and path of the ReportHIVByAgeAndGender.csv file to plot the data from.
unworld_pop_filename (str, required):
The name and path to a UN World Pop Excel spreadsheet where the 'country' parameter specifies
a country name found in the spreadsheet and the 'version' specifies the year of the data.
These values are needed to know how to read the data in the spreadsheet.
age_bin_list (list[float], required):
A list of ages in years where the population with risk value will be counted
for each bin. For example, if you enter [10, 25, 30, 55], there will be three
age bins with the following ranges: [10-25), [25-30), [30-55)
country (str, required):
The name of the country found in the spreadsheet to extract the data for.
version (str, required):
The year associated with when the UN World Pop file was created.
PLEASE NOTE: This year is a string.
x_base_population (float, optional):
The 'x_Base_Population' value (found in the config) is used to divide by the population
numbers in the CSV file so you get numbers that match the true population.
img_dir (str, optional):
Directory to save the images. If None, the images will not be saved and a window will be opened.
Returns:
None - but image will be saved or window opened.
"""
if not os.path.isfile(filename):
raise ValueError(f"The filename, '{filename}' given does not appear to be a file.")
df = pd.read_csv(filename)
if COL_NAME_GENDER not in df.columns:
raise ValueError(f"'{COL_NAME_GENDER}' column does not exist in the file({filename}).")
if COL_NAME_AGE not in df.columns:
raise ValueError(f"'{COL_NAME_AGE}' column does not exist in the file({filename}).")
if COL_NAME_POP not in df.columns:
raise ValueError(f"'{COL_NAME_POP}' column does not exist in the file({filename}).")
ages = df[COL_NAME_AGE].unique()
pv = df.pivot_table(index=COL_NAME_YEAR,
columns=[COL_NAME_AGE],
values=COL_NAME_POP,
aggfunc="sum")
df2 = pd.DataFrame()
df2.index = pv.index
for index in range(len(age_bin_list) - 1):
label = f"Population: {age_bin_list[index]}-{age_bin_list[index + 1]}"
pv[label] = 0
for age in ages:
if (age_bin_list[index] <= age) and (age < age_bin_list[index + 1]):
pv[label] = pv[label] + pv[(age)]
df2[label] = pv[label] / x_base_population
years = df2.index.values.astype(int).tolist()
years = list(set(years))
unwp_df = unwp.extract_population_by_age(country=country,
version=version,
years=years,
filename=unworld_pop_filename)
unwp_df.index = unwp_df[COL_NAME_YEAR]
unwp_df2 = pd.DataFrame()
unwp_df2.index = unwp_df.index
for index in range(len(age_bin_list) - 1):
name_old = str(age_bin_list[index])
name_new = f"Expected Population: {age_bin_list[index]}-{age_bin_list[index + 1]}"
unwp_df2[name_new] = unwp_df[name_old]
unwp_df2.index = unwp_df2.index.astype(float)
xy_plot.xy_plot(img_dir=img_dir,
df=df2,
expected_df=unwp_df2,
title_1="Population by Age",
title_2=f"UN World Population - {country} - {version}",
y_axis_name="Number of People",
fraction_of_total=False,
min_x=None, max_x=None, min_y=None, max_y=None,
x_axis_as_log_scale=False,
y_axis_as_log_scale=False)
[docs]
def plot_risk(dir_or_filename: str,
starting_expected_values: dict = None,
expected_value_for_high_per_node: list[float] = None,
gender: str = None,
age_bin_list: list[float] = None,
show_avg_per_run: bool = False,
show_fraction: bool = False,
img_dir: str = None):
"""
Create one plot for each node in 'expected_value_for_high_per_node' showing the risk
values for the population versus what is expected.
Args:
dir_or_filename (str, required):
The directory or filename containing the ReportHIVByAgeAndGender.csv files.
starting_expected_values (dict, optional):
The starting three values of how risk distributed to the population. These
should be the same values that you have in the demographics. The order is
LOW, MEDIUM, and HIGH. Typically, one might have 0.85 for LOW, 0.15 for MEDIUM,
and 0.0 for HIGH because people get set to HIGH in the campaign's CSW logic.
expected_value_for_high_per_node (list[float, optional]):
A list of expected fraction of the population to have Risk = HIGH for a given node.
The node ID of each value is expected to be the index of the position plus 1.
The starting values for LOW and MEDIUM are adjusted for this HIGH value.
gender (str, optional):
The string (Male or Female) for the gender that data is being filtered for.
age_bin_list (list[float], optional):
A list of ages in years where the population with risk value will be counted
for each bin. For example, if you enter [10, 25, 30, 55], there will be three
age bins with the following ranges: [10-25), [25-30), [30-55)
show_avg_per_run (bool, optional):
If 'dir_or_filename' is a directory, this will calculate the average number of
people with the given risk type at a given time step between the files.
Default is False.
show_fraction (bool, optional):
True indicates that for each stratification the number of people with a given
risk value is divided by the total number of people in that stratification.
img_dir (str, optional):
Directory to save the images. If None, the images will not be saved and a window will be opened.
Returns:
None - but image will be saved or window opened.
"""
if ((starting_expected_values is not None) and (expected_value_for_high_per_node is None) or # noqa: E271, W504
(starting_expected_values is None) and (expected_value_for_high_per_node is not None)): # noqa: E225, E271, E129
raise ValueError("'starting_expected_values' and 'expected_value_for_high_per_node'" + # noqa: W504
" need to be either both None or both defined.")
if (starting_expected_values is not None) and (starting_expected_values["HIGH"] != 0.0):
raise ValueError("Expected 'starting_expected_values['HIGH'] to be zero.")
if (starting_expected_values is not None) and (expected_value_for_high_per_node is not None):
node_id = 0
for high_value in expected_value_for_high_per_node:
node_id = node_id + 1
expected_values = None
if (starting_expected_values is not None) and (expected_value_for_high_per_node is not None):
expected_values = {}
expected_values["LOW" ] = starting_expected_values["LOW" ] * (1.0 - high_value) # noqa: E202
expected_values["MEDIUM"] = starting_expected_values["MEDIUM"] * (1.0 - high_value)
expected_values["HIGH" ] = high_value # noqa: E202
plot_population_by_ip(dir_or_filename=dir_or_filename,
node_id=node_id,
gender=gender,
age_bin_list=age_bin_list,
ip_key="Risk",
ip_values=["LOW", "MEDIUM", "HIGH"],
show_avg_per_run=show_avg_per_run,
show_fraction=show_fraction,
expected_values=expected_values,
img_dir=img_dir)
else:
plot_population_by_ip(dir_or_filename=dir_or_filename,
node_id=None,
gender=gender,
age_bin_list=age_bin_list,
ip_key="Risk",
ip_values=["LOW", "MEDIUM", "HIGH"],
show_avg_per_run=show_avg_per_run,
show_fraction=show_fraction,
expected_values=None,
img_dir=img_dir)
[docs]
def plot_vmmc_for_dir(dir_or_filename: str,
node_id: int = None,
age_bin_list: list[float] = None,
show_expected: bool = False,
show_avg_per_run: bool = False,
img_dir: str = None):
"""
Create a plot showing what fraction of men are circumcised over time versus the
expected number of circumcised men.
Args:
dir_or_filename (str, required):
The directory or filename containing the ReportHIVByAgeAndGender.csv files.
node_id (int, optional):
The ID of the node for which the data is being filtered for.
age_bin_list (list[float], optional):
A list of ages in years where the population with risk value will be counted
for each bin. For example, if you enter [10, 25, 30, 55], there will be three
age bins with the following ranges: [10-25), [25-30), [30-55)
show_expected (bool, optional):
If true, plot the expected fraction of circumcisions.
show_avg_per_run (bool, optional):
If 'dir_or_filename' is a directory, this will calculate the average number of
people with the given risk type at a given time step between the files.
Default is False.
img_dir (str, optional):
Directory to save the images. If None, the images will not be saved and a window will be opened.
Returns:
None - but image will be saved or window opened.
"""
if node_id is None and show_expected:
raise ValueError("You need to specify 'node_id' if you want to compare actual data against the expected values.")
dir_filenames = helpers.get_filenames(dir_or_filename=dir_or_filename,
file_prefix="ReportHIVByAgeAndGender",
file_extension=".csv")
combined_df = pd.DataFrame()
for fn in dir_filenames:
df_circ, not_used = extract_population_data_multiple_ages(fn,
node_id=node_id,
gender="Male",
age_bin_list=age_bin_list,
other_strat_column_name=COL_NAME_IS_CIRC,
other_strat_value=1)
df_un_circ, not_used = extract_population_data_multiple_ages(fn,
node_id=node_id,
gender="Male",
age_bin_list=age_bin_list,
other_strat_column_name=COL_NAME_IS_CIRC,
other_strat_value=0)
if len(combined_df.columns) == 0:
combined_df.index = df_circ.index
for column_name in df_circ.columns:
name = column_name
if not show_avg_per_run:
name = fn + "-" + name
combined_df[name] = 0
for column_name in df_circ.columns:
if show_avg_per_run:
combined_df[column_name] = combined_df[column_name] + df_circ[column_name] / (df_circ[column_name] + df_un_circ[column_name])
else:
name = fn + "-" + column_name
combined_df[name] = df_circ[column_name] / (df_circ[column_name] + df_un_circ[column_name])
if show_avg_per_run:
for column_name in combined_df.columns:
combined_df[column_name] = combined_df[column_name] / len(dir_filenames)
traditional_coverage_per_node = [0.054978651, 0.139462861, 0.028676043, 0.091349358, 0.123187070,
0.039308099, 0.727917322, 0.041105263, 0.044388102, 0.398239794]
rtec_x1 = 2016
rtec_x2 = 2021
rtec_y1 = 0.54
rtec_y2 = 0.9
expected_df = None
if show_expected:
expected_df = pd.DataFrame()
expected_df.index = combined_df.index
expected_df["Expected Traditional"] = traditional_coverage_per_node[node_id - 1]
years = combined_df.index.values.astype(float).tolist()
rtec = []
for year in years:
if year < rtec_x1:
rtec.append(0)
elif year > rtec_x2:
rtec.append(rtec_y2)
else:
rtec.append(rtec_y1 + (year - rtec_x1) * (rtec_y2 - rtec_y1) / (rtec_x2 - rtec_x1))
expected_df["Expected Plus Medical"] = rtec
title = "Circumcised Men"
if show_avg_per_run:
title = title + " - Average"
if node_id is not None:
title = title + " - Node " + str(node_id)
title2 = None
if TEST_include_dir_or_filename:
title2 = dir_or_filename
xy_plot.xy_plot(img_dir=img_dir,
df=combined_df,
expected_df=expected_df,
title_1=title,
title_2=title2,
y_axis_name="Fraction of Circumcised Men",
fraction_of_total=False,
show_legend=show_avg_per_run,
show_markers=show_avg_per_run,
min_x=None, max_x=None, min_y=None, max_y=None,
x_axis_as_log_scale=False,
y_axis_as_log_scale=False)
[docs]
def plot_prevalence_for_dir(dir_or_filename: str,
exp_dir_or_filename: str = None,
node_id: int = None,
gender: str = None,
age_bin_list: list[float] = None,
show_avg_per_run: bool = False,
show_fraction: bool = False,
img_dir: str = None):
"""
Create a plot showing who is infected with HIV.
Args:
dir_or_filename (str, required):
The directory or filename containing the ReportHIVByAgeAndGender.csv files.
exp_dir_or_filename (str, required):
The expected or alternate directory or filename containing the ReportHIVByAgeAndGender.csv files.
node_id (int, optional):
The ID of the node for which the data is being filtered for.
gender (str, optional):
The string (Male or Female) for the gender that data is being filtered for.
age_bin_list (list[float], optional):
A list of ages in years where the population with risk value will be counted
for each bin. For example, if you enter [10, 25, 30, 55], there will be three
age bins with the following ranges: [10-25), [25-30), [30-55)
show_avg_per_run (bool, optional):
If 'dir_or_filename' is a directory, this will calculate the average number of
people with the given risk type at a given time step between the files.
Default is False.
show_fraction (bool, optional):
True indicates that the number of infected people should be divided by the total
number of people in that stratification.
img_dir (str, optional):
Directory to save the images. If None, the images will not be saved and a window will be opened.
Returns:
None - but image will be saved or window opened.
"""
base_plot_by_age(base_title="Infected",
main_column_name=COL_NAME_INFECTED,
dir_or_filename=dir_or_filename,
exp_dir_or_filename=exp_dir_or_filename,
node_id=node_id,
age_bins=age_bin_list,
gender=gender,
show_avg_per_run=show_avg_per_run,
show_fraction=show_fraction,
fraction_of=False,
fraction_of_column_name="",
fraction_of_str="",
img_dir=img_dir)
[docs]
def plot_risk_zambia(dir_or_filename: str,
age_bin_list: list[float] = None,
show_avg_per_run: bool = False,
show_fraction: bool = False,
show_expected: bool = False,
img_dir: str = None):
"""
Create multiple risk value plots where each plot is for a specific node and gender.
The plot can show the count or fraction of the group that has one of the three
risk values. The plot can also show the expected values for specific node and gender
for Zambia.
Args:
dir_or_filename (str, required):
The directory or filename containing the ReportHIVByAgeAndGender.csv files.
age_bin_list (list[float], optional):
A list of ages in years where the population with risk value will be counted
for each bin. For example, if you enter [10, 25, 30, 55], there will be three
age bins with the following ranges: [10-25), [25-30), [30-55)
show_avg_per_run (bool, optional):
If 'dir_or_filename' is a directory, this will calculate the average number of
people with the given risk type at a given time step between the files.
Default is False.
show_fraction (bool, optional):
True indicates that the data is not true counts but a fraction
(i.e. a count divided by another counter)
show_expected (bool, optional):
True indicates that you want to see the expected fractions of the population that
have the different risk values PER NODE. False will be data for all nodes.
img_dir (str, optional):
Directory to save the images. If None, the images will not be saved and a window will be opened.
Returns:
None - but image will be saved or window opened.
"""
starting_expected_values = {
"LOW": 0.85,
"MEDIUM": 0.15,
"HIGH": 0.0
}
node_to_high = []
gender_list = ["Female", "Male"]
for gender in gender_list:
if gender == "Female":
# node_to_high = [0.125, 0.125, 0.125, 0.0500, 0.0500, 0.0500, 0.125, 0.0500, 0.125, 0.0500]
node_to_high = [0.125, 0.125, 0.125, 0.0500]
else:
# node_to_high = [0.195, 0.195, 0.195, 0.0781, 0.0781, 0.0781, 0.195, 0.0781, 0.195, 0.0781]
node_to_high = [0.195, 0.195, 0.195, 0.0781]
if not show_expected:
starting_expected_values = None
node_to_high = None
plot_risk(dir_or_filename=dir_or_filename,
starting_expected_values=starting_expected_values,
expected_value_for_high_per_node=node_to_high,
gender=gender,
age_bin_list=age_bin_list,
show_avg_per_run=show_avg_per_run,
show_fraction=show_fraction,
img_dir=img_dir)
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('dir_or_filename', type=str, nargs=1, help='A directory with ReportHivByAgeAndGender.csv files or a single file.')
parser.add_argument('exp_dir_or_filename', type=str, default=None, nargs='?', help='A directory with ReportHivByAgeAndGender.csv files or a single file to compare to.')
parser.add_argument('-p', '--plot', default='population', help='Options: population, prevalence, risk, vmmc, art, state, column, summary')
parser.add_argument('-o', '--output', default=None, help='If provided, a directory will be created and images saved to the folder. If not provided, it opens windows.')
parser.add_argument('-n', '--node_id', type=int, default=None, help='Plot the data for a specific node.')
parser.add_argument('-m', '--mean', help='Gives the average/mean of each run at that time point.', action='store_true')
parser.add_argument('-a', '--ages', help='Show the data stratified by age.', action='store_true')
parser.add_argument('-x', '--expected', help='Show the expected data.', action='store_true')
parser.add_argument('-c', '--column_name', default='Died', help='Only used with plot=column. The name of a non-stratification column from the report. Do not include the space before the name.')
args = parser.parse_args()
dir_or_filename = args.dir_or_filename[0]
exp_dir_or_filename = None
if args.exp_dir_or_filename is not None:
exp_dir_or_filename = args.exp_dir_or_filename
node_id = args.node_id
plot_list = []
if args.plot == "summary":
plot_list = ["population", "prevalence", "vmmc", "art", "state"]
else:
plot_list = [args.plot]
for plot in plot_list:
if plot == "population":
if args.expected:
print("===============================================")
print("Prevalence plot does not support expected data.")
print("===============================================")
age_bin_list = None
if args.ages:
age_bin_list = [0, 15, 25, 35, 45, 55, 100]
plot_population_by_age(dir_or_filename=dir_or_filename,
exp_dir_or_filename=exp_dir_or_filename,
node_id=node_id,
gender=None,
age_bin_list=age_bin_list,
show_avg_per_run=args.mean,
img_dir=args.output)
elif plot == "prevalence":
if args.expected:
print("===============================================")
print("Prevalence plot does not support expected data.")
print("===============================================")
age_bin_list = None
if args.ages:
age_bin_list = [15, 25, 35, 45, 55]
plot_prevalence_for_dir(dir_or_filename=dir_or_filename,
exp_dir_or_filename=exp_dir_or_filename,
node_id=args.node_id,
gender=None,
age_bin_list=age_bin_list,
show_avg_per_run=args.mean,
show_fraction=False,
img_dir=args.output)
elif plot == "risk":
if args.node_id is not None:
print("===============================================")
print("Population plot does not support node_id.")
print("===============================================")
age_bin_list = None
if args.ages:
age_bin_list = [15, 35, 55]
plot_population_by_ip(dir_or_filename=dir_or_filename,
exp_dir_or_filename=exp_dir_or_filename,
node_id=args.node_id,
ip_key="Risk",
age_bin_list=age_bin_list,
show_avg_per_run=args.mean,
show_fraction=False,
img_dir=args.output)
# plot_risk_zambia(dir_or_filename=dir_or_filename,
# age_bin_list=age_bin_list,
# show_avg_per_run=args.mean,
# show_fraction=True,
# show_expected=args.expected,
# img_dir=args.output)
elif plot == "vmmc":
age_bin_list = None
if args.ages:
age_bin_list = [15, 20, 25, 30, 35, 40, 45, 50, 55]
plot_vmmc_by_age(dir_or_filename=dir_or_filename,
exp_dir_or_filename=exp_dir_or_filename,
node_id=args.node_id,
age_bin_list=age_bin_list,
show_avg_per_run=args.mean,
img_dir=args.output)
# plot_vmmc_for_dir(dir_or_filename=dir_or_filename,
# node_id=args.node_id,
# age_bin_list=age_bin_list,
# show_expected=args.expected,
# show_avg_per_run=args.mean,
# img_dir=args.output)
elif plot == "art":
if args.expected:
print("========================================")
print("ART plot does not support expected data.")
print("========================================")
age_bin_list = None
if args.ages:
age_bin_list = [0, 15, 50, 75, 100]
plot_onART_by_age(dir_or_filename=dir_or_filename,
exp_dir_or_filename=exp_dir_or_filename,
node_id=args.node_id,
gender=None,
age_bin_list=age_bin_list,
show_avg_per_run=args.mean,
show_fraction=True,
fraction_of_infected=True,
img_dir=args.output)
elif plot == "state":
if args.expected:
print("==========================================")
print("State plot does not support expected data.")
print("==========================================")
age_bin_list = None
if args.ages:
age_bin_list = [15, 30, 45]
plot_population_by_ip(dir_or_filename=dir_or_filename,
exp_dir_or_filename=exp_dir_or_filename,
node_id=args.node_id,
ip_key="CascadeState",
age_bin_list=age_bin_list,
show_avg_per_run=args.mean,
show_fraction=False,
img_dir=args.output)
elif plot == "column":
plot_columns(filename=dir_or_filename,
title=args.column_name,
y_axis_name="Count",
column_names=[" " + args.column_name],
img_dir=args.output)
else:
raise ValueError(f"Plot type '{args.plot}' is not supported.")