Archibald Prize
Contents
Archibald Prize¶
This is an exploratory data analysis of collected data from Art Gallery NSW among other external sources. We focus on the Archibald Prize and take a deep dive into temporal trends relating to gender, portrait characteristics and career paths. Data ranges over 100 years (1921-2022).
The data consists of…
participation records
prize money records
image data of winning potraits
basic biographical data for winners
Import packages and pre-process data¶
import pandas as pd
import numpy as np
from collections import Counter
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
from matplotlib.ticker import StrMethodFormatter
from webcolors import CSS3_NAMES_TO_HEX
import seaborn as sns
sns.set(style='white', context='paper')
from os import listdir
from os.path import isfile, join
from PIL import Image
from PIL.ImageStat import Stat
import math
import requests
from bs4 import BeautifulSoup
from os.path import basename
########### 1. Collect data from the Art Gallery of NSW website ###########
# global mainURL
# mainURL = 'https://www.artgallery.nsw.gov.au/'
# def assort_prize_metadata(text):
# prize_dict = dict({'Entries':'',
# 'Presenting partner':'',
# 'Sponsor':'',
# 'Exhibition dates':'',
# 'Misc.':'',
# 'Text':''})
# for t in text:
# for k in list(prize_dict.keys())[:-2]:
# if k in t:
# if ' ' not in t: prize_dict[k] = t.strip().replace(k + ': ','')
# else:
# prize_dict[k] = t.split(' ')[0]
# prize_dict['Text'] = t.split(prize_dict[k])[1]
# break
# if prize_dict['Text'] == '':
# prize_dict['Misc.'] = text[-1].split(' ')[0]
# if len(prize_dict['Misc.']):
# prize_dict['Text'] = text[-1].split(prize_dict['Misc.'])[1]
# return prize_dict
# def collect_records(prize = 'archibald', prize_year = 1921):
# prize_url = mainURL + "prizes/" + prize + '/' + str(prize_year)
# page = requests.get(prize_url)
# soup = BeautifulSoup(page.content, "html.parser")
# # fetch winner data
# try:
# winner_artist = soup.find_all("span", class_="card-prizesWinner-artist")[0].text
# winner_title = soup.find_all("span", class_="card-prizesWinner-title")[0].text
# try:
# winner_image = soup.find_all("img", class_="card-prizesWinner-image")[0].get('src')
# with open('ArchibaldWinners/' + str(yr) + '_' + basename(mainURL + winner_image), "wb") as f:
# f.write(requests.get(mainURL + winner_image).content)
# except: winner_image = None
# winner_info = [winner_artist,winner_title,winner_image]
# except:
# winner_info = [None,None,None]
# # download winning image
# # with open(basename(winner_image),"wb") as f: f.write(requests.get(mainURL + winner_image).content)
# # pre-process
# delimiter = '###' # unambiguous string
# for line_break in soup.findAll('br'): # loop through line break tags
# line_break.replaceWith(delimiter) # replace br tags with delimiter
# textModule = soup.find("div", class_="grid text").get_text().split(delimiter) # get list of strings
# # fetch prize metadata
# prize_metadata_dict = assort_prize_metadata(text=textModule)
# prize_metadata_dict['winner_info'] = winner_info
# # fetch participant data
# participants = []
# if len(soup.find_all("div", class_="grid text")) > 1:
# for item in soup.find_all("div", class_="grid text")[1].find_all('ul')[0].find_all('li'):
# try: participant_href = item.find_all("a")[0].get('href')
# except: participant_href = ''
# participant_artist = item.find_all("strong")[0].text
# participant_title = item.find_all("em")[0].text
# try: participant_label = item.text.split(participant_title)[-1].strip()
# except: participant_label = ''
# participants.append([participant_href, participant_artist, participant_title, participant_label])
# else:
# for item in soup.find_all("div", class_="artworksList-item"):
# participant_href = item.find_all("a", class_="card-artwork-link")[0].get('href')
# participant_artist = item.find_all("span", class_="card-artwork-artist")[0].text
# participant_title = item.find_all("span", class_="card-artwork-title")[0].text
# participant_label = item.find_all("p", class_="card-artwork-label")[0].text
# participants.append([participant_href, participant_artist, participant_title, participant_label])
# prize_metadata_dict['participant_info'] = participants
# return prize_metadata_dict
# archibald_data_dict = dict({'Prize Data':[],'Year':[]})
# # pre 1991/92
# for yr in range(1921,1991):
# try: archibald_data_dict['Prize Data'].append(
# collect_records(prize = 'archibald', prize_year = yr))
# except: archibald_data_dict['Prize Data'].append(None)
# archibald_data_dict['Year'].append(yr)
# # 1991/92 exception
# try: archibald_data_dict['Prize Data'].append(
# collect_records(prize = 'archibald', prize_year = '1991-92'))
# except: archibald_data_dict['Prize Data'].append(None)
# archibald_data_dict['Year'].append('1992')
# # post 1991/92
# for yr in range(1993,2023):
# try: archibald_data_dict['Prize Data'].append(
# collect_records(prize = 'archibald', prize_year = yr))
# except: archibald_data_dict['Prize Data'].append(None)
# archibald_data_dict['Year'].append(yr)
########### Convert dictionary as dataframe and write as csv file ###########
# archies = pd.DataFrame(archibald_data_dict)
# archies.to_csv('data/archies.csv', index=False)
########### Read csv file as dataframe ###########
# this imported dataset was further preprocessed by filtering on winners
# and adding columns in regard to each winner's biographical information
# along with corresponding ANZSCO classification data
archies = pd.read_csv('data/archies_v2.csv')
# We show a transposed of the first three rows of the dataframe
archies.head(3).T
0 | 1 | 2 | |
---|---|---|---|
YEAR | 1921 | 1922 | 1923 |
WINNER | W B McInnes | W B McInnes | W B McInnes |
GENDER | Male | Male | Male |
DOB | 1889.0 | 1889.0 | 1889.0 |
DOD | 1939.0 | 1939.0 | 1939.0 |
Unnamed: 5 | 32 | 33 | 34 |
PORTRAIT TITLE | Desbrowe Annear | Professor Harrison Moore | Portrait of a lady |
Sitter | Harold Desbrowe-Annear | William Harrison Moore | Violet McInnes |
DOB.1 | 1865.0 | 1867.0 | 1892.0 |
Sitter Age | 56.0 | 55.0 | 31.0 |
Self | 0 | 0 | 0 |
PORTRAIT GENDER | Male | Male | Female |
PORTRAIT OCC (Copy/Paste) | NaN | constitutional lawyer and dean of the law facu... | wife, Violet McInnes |
OCC. CATEGORY (1) | Architect | Professor | Wife |
OCC. CATEGORY (2) | Architect | Professor | Person |
ANZSCO_1 | Design, Engineering, Science and Transport Pro... | Education Professionals | NaN |
ANZSCO_2 | Professionals | Professionals | NaN |
Comments | With his (McInnes) wife, fellow artist Violet ... | New rules were added to the competition this y... | WB McInnes’s Portrait of a lady – his third wi... |
Gender distribution¶
Male and female distribution for Archibald winners¶
We use a donut chart to explore how gender has been recorded for Archibald winners; 88% of the data has been recorded as Male and 12% as Female.
It should be noted that for three years (1964, 1980 and 1991), there were no Archibald prize winners.
## Gender Proportion
df_gender=pd.DataFrame(dict(Counter(archies["GENDER"])).items(),
columns=["Gender","Frequency"])
# explosion
explode = (0.05, 0.05)
# Pie Chart
plt.pie(df_gender[~df_gender.Gender.isnull()]['Frequency'], labels=['Male','Female'],
autopct='%1.1f%%', pctdistance=0.85,
explode=explode)
# draw circle
centre_circle = plt.Circle((0, 0), 0.70, fc='white')
fig = plt.gcf()
# Adding Circle in Pie chart
fig.gca().add_artist(centre_circle)
# Adding Title of chart
plt.title('Gender proportion')
# Displaying Chart
plt.show()
Male and female distribution of sitters for winning Archibald portraits¶
Beyond the winning painter, we also assess the gender distribution of the sitters within the winning portraits Again we use a donut chart to explore the distribution. According data collected from various online sources, we found that 82% of sitters were recorded as Male, and 18% as Female.
## Gender Proportion
df_gender=pd.DataFrame(dict(Counter(archies["PORTRAIT GENDER"])).items(),
columns=["Gender","Frequency"])
# explosion
explode = (0.05, 0.05)
# plt.rcParams.update(plt.rcParamsDefault)
# # plt.rcParams['font.size'] = 14
# # plt.rcParams['text.color'] = 'white'
# Pie Chart
patches, texts, autotexts = plt.pie(df_gender[~df_gender.Gender.isnull()]['Frequency'], labels=['Male','Female'],
autopct='%1.1f%%', pctdistance=0.815, #textprops={'color':"w", 'fontsize':13},
explode=explode)
texts[0].set_fontsize(14); texts[1].set_fontsize(14)
for autotext in autotexts:
autotext.set_color('white')
autotext.set_fontsize(13)
# draw circle
centre_circle = plt.Circle((0, 0), 0.70, fc='white')
fig = plt.gcf()
# Adding Circle in Pie chart
fig.gca().add_artist(centre_circle)
# Adding Title of chart
plt.title('Gender proportion for subjects', fontsize=15)
# Displaying Chart
plt.show()
# fig.savefig('subject_genders.png', dpi=330)
Do males paint males?¶
We also consider the gender distribution of sitters by male and female Archibald winners. The clustered bar chart shows that 86% of winning portraits painted by males consisted of male sitters. This differs quite a bit to winning portraits painted by females, which consists of an even distribution (50% male sitters, 50% female sitters). It should be noted that there are 12 winning portraits painted by females.
# create a crosstab table
df_crosstab = pd.crosstab(index=archies['GENDER'], columns=archies['PORTRAIT GENDER'], normalize='index')
# convert the crosstab table to a tidy format
df_tidy = pd.crosstab(archies['GENDER'], archies['PORTRAIT GENDER'], normalize='index').stack().reset_index()
df_tidy.columns = ['Artist Gender', 'Subject gender', 'Proportion']
g = sns.FacetGrid(df_tidy, col="Artist Gender")
g.map(sns.barplot, "Subject gender", "Proportion", order=["Female", "Male"], palette=['#EC7E45', '#4C72B0'])
g.set_titles(
col_template="{col_name} artists",
size=12,
)
# change y-axis limits
g.set(ylim=(0, 1.1))
# remove y-axis ticks and labels
g.set(yticks=[])
g.set(yticklabels=[])
g.set(ylabel=None)
# For each bar, add the label with rounded value
for ax in g.axes.flat:
for p in ax.patches:
ax.annotate('{:.0%}'.format(p.get_height()), (p.get_x()+0.3, p.get_height()+0.025), size=12)
# increase figure size'
g.fig.set_figwidth(6.5)
g.fig.set_figheight(4.5)
plt.show()
# g.savefig('subject_genders_by_artist_gender.png', dpi=330)
# ax = pd.crosstab(archies['GENDER'], archies['PORTRAIT GENDER'], normalize='index')\
# .plot(kind='bar', rot=0)
# # Get bar heights for each bar in the plot
# bar_heights = [p.get_height() for p in ax.patches]
# # For each bar, add the label with rounded value
# for i, b in enumerate(ax.patches):
# # if i < 2:
# # b.set_color('#1f77b4')
# # else:
# # b.set_color('#ff7f0e')
# ax.text(b.get_x() + b.get_width()/2,
# b.get_height() + 0.01,
# str(round(round(bar_heights[i], 2)*100,2)) + '%',
# ha='center', size=12)
# # increase ylim
# ax.set_ylim(0,1)
# # remove y-axis ticks and labels
# ax.set_yticks([])
# ax.set_yticklabels([])
# ax.yaxis.label.set_visible(False)
# # remove x-axis title
# ax.set_xlabel('Artist gender')
# # increase x-axis labels font size
# ax.tick_params(axis='x', labelsize=11)
# # increae x-axis title font size
# ax.xaxis.label.set_fontsize(12)
# # set legend title
# ax.legend(title='', loc='upper left', ncol=1)
# # increase legend font sie
# ax.get_legend().get_texts()[0].set_fontsize('12')
# ax.get_legend().get_texts()[1].set_fontsize('12')
# # change labels in legend
# ax.get_legend().get_texts()[0].set_text('Male subject')
# ax.get_legend().get_texts()[1].set_text('Female subject')
# # make first two bars darker
# ax.patches[0].set_color('#819CC7')
# ax.patches[2].set_color('#EAAF8E')
# ax.patches[1].set_color('#4C72B0')
# ax.patches[3].set_color('#EC7E45')
# plt.show()
Across winning portraits, it is 36% more likely that a sitter will be female if the painter is female.
Male and female distribution over time¶
The two time series visualisations below showcase the number of Archibald winners and sitters across twenty-year brackets. The data for Archibald winners reveals that only in recent decades have females won a higher proportion of Archibald prizes in comparison to their corresponding vicennium. The trend for sitters also shares a similar pattern to the Archibald winners time series. Following our previous insights, this suggests that as more female artists win Archibalds, there is a corresponding increase in the number of female sitters being painted.
### create a new column for the year of the vicennium
archies['year_vicennium'] = [ int(np.floor(int(year)/20) * 20)
for year in np.array(archies['YEAR'])]
archies['year_vicennium'] = np.where(archies['year_vicennium'] == 2020, 2000, archies['year_vicennium'])
### get count by gender
males_tab = archies[archies['GENDER'] == 'Male']['year_vicennium']\
.value_counts()\
.reset_index()\
.sort_values('index')
females_tab = archies[archies['GENDER'] == 'Female']['year_vicennium']\
.value_counts()\
.reset_index()\
.sort_values('index')
males_sitters_tab = archies[archies['PORTRAIT GENDER'] == 'Male']['year_vicennium']\
.value_counts()\
.reset_index()\
.sort_values('index')
females_sitters_tab = archies[archies['PORTRAIT GENDER'] == 'Female']['year_vicennium']\
.value_counts()\
.reset_index()\
.sort_values('index')
### merge tables and get row proportions for Males and Females
count_by_gender = pd.merge(males_tab, females_tab, on='index', how='outer').fillna(0)
count_by_gender.columns = ['Vicennium', 'Males', 'Females']
count_by_gender['Females_Prop'] = round(count_by_gender['Females']/(count_by_gender['Females'] + count_by_gender['Males']),2)
count_by_gender['Males_Prop'] = round(count_by_gender['Males']/(count_by_gender['Females'] + count_by_gender['Males']),2)
count_by_gender_sitter = pd.merge(males_sitters_tab, females_sitters_tab, on='index', how='outer').fillna(0)
count_by_gender_sitter.columns = ['Vicennium', 'Males', 'Females']
count_by_gender_sitter['Females_Prop'] = round(count_by_gender_sitter['Females']/(count_by_gender_sitter['Females'] + count_by_gender_sitter['Males']),2)
count_by_gender_sitter['Males_Prop'] = round(count_by_gender_sitter['Males']/(count_by_gender_sitter['Females'] + count_by_gender_sitter['Males']),2)
### plot gender proportions of winners over time
fig, ax = plt.subplots(figsize=(10, 6))
plt.plot(count_by_gender['Vicennium'],
count_by_gender['Males_Prop'],
label="Males", marker='o')
plt.plot(count_by_gender['Vicennium'],
count_by_gender['Females_Prop'],
label="Females", marker='o')
for i, txt in enumerate(count_by_gender['Males_Prop']):
ax.annotate(str(int(round(txt*100,0)))+ '%', (count_by_gender['Vicennium'][i],
count_by_gender['Males_Prop'][i]*1.035),
ha='center', va='bottom', size=12.5)
for i, txt in enumerate(count_by_gender['Females_Prop']):
ax.annotate(str(int(round(txt*100,0)))+ '%', (count_by_gender['Vicennium'][i],
count_by_gender['Females_Prop'][i]*1.1),
ha='center', va='bottom', size=12.5)
# adjust legend
ax.legend(loc="upper right", ncol=2)
ax.yaxis.set_ticklabels([])
ax.yaxis.set_ticks([])
plt.xlabel('')
plt.ylim([-0.1, 1.23])
plt.grid(axis='x')
plt.xticks([1920,1940,1960,1980,2000], ['1920-1940', '1940-1960', '1960-1980','1980-2000', '2000-'])
plt.title('Proportion of Archibald winners,\nMales and Females, 20-year periods')
plt.show()
### plot gender proportions of sitters over time
fig, ax = plt.subplots(figsize=(10, 6))
plt.plot(count_by_gender_sitter['Vicennium'],
count_by_gender_sitter['Males_Prop'],
label="Males", marker='o')
plt.plot(count_by_gender_sitter['Vicennium'],
count_by_gender_sitter['Females_Prop'],
label="Females", marker='o')
for i, txt in enumerate(count_by_gender_sitter['Males_Prop']):
ax.annotate(str(int(round(txt*100,0)))+ '%', (count_by_gender_sitter['Vicennium'][i],
count_by_gender_sitter['Males_Prop'][i]*1.035),
ha='center', va='bottom', size=12.5)
for i, txt in enumerate(count_by_gender_sitter['Females_Prop']):
ax.annotate(str(int(round(txt*100,0)))+ '%', (count_by_gender_sitter['Vicennium'][i],
count_by_gender_sitter['Females_Prop'][i]*1.09),
ha='center', va='bottom', size=12.5)
# adjust legend
ax.legend(loc="upper right", ncol=2)
ax.yaxis.set_ticklabels([])
ax.yaxis.set_ticks([])
plt.xlabel('')
plt.ylim([-0.1, 1.23])
plt.grid(axis='x')
plt.xticks([1920,1940,1960,1980,2000], ['1920-1940', '1940-1960', '1960-1980','1980-2000', '2000-'])
plt.title('Proportion of sitters,\nMales and Females, 20-year periods')
plt.show()
Winning age for Archibald winners¶
We use a histogram chart to explore the distribution of winning age. The histogram exhibits a relatively bi-modal shape with some painters winning the Archibald prize much later in their career. However, the majority cluster around the mid-40s.
The youngest painter to win the Archibald Prize was Nora Heysen at the age of 27 years (1938) and the oldest being John Olsen wininng at the age of 77 years (2005).
Furthermore, we calculate the median winning age by gender of winning painter, and found that males (45) on average win later than females (39).
def upper_rugplot(data, height=.05, ax=None, **kwargs):
from matplotlib.collections import LineCollection
ax = ax or plt.gca()
kwargs.setdefault("linewidth", 1)
segs = np.stack((np.c_[data, data],
np.c_[np.ones_like(data), np.ones_like(data)-height]),
axis=-1)
lc = LineCollection(segs, transform=ax.get_xaxis_transform(), **kwargs)
ax.add_collection(lc)
archies['winning_age'] = archies['YEAR'] - archies['DOB']
# print(pd.DataFrame(archies.winning_age.describe()).T,'')
# print(pd.DataFrame(archies[archies['GENDER'] == 'Male']['winning_age'].describe()).T)
# print(pd.DataFrame(archies[archies['GENDER'] == 'Female']['winning_age'].describe()).T)
sns.kdeplot(archies['winning_age'], fill=True)
upper_rugplot(archies['winning_age'], height=.05, alpha=.8)
plt.title('Distribution of winning age, Median = 44')
plt.ylim([0, 0.04])
plt.xlabel('Winning Age')
plt.show()
Winning age by year¶
The first line plot below shows the age of Archibald winners per year. At first glance, the winning age appears to fluctuate randomly, but there are some observable patterns prior to 1960. Upon closer examination, we discover that these gradual changes are the result of the same individuals winning the Archibald Prize multiple times.
We list five of the most frequent Archibald winners - all of which have more than three prizes.
Artist |
Number of Archibald prizes |
---|---|
William Dargie |
8 |
W B McInnes |
7 |
John Longstaff |
5 |
Ivor Hele |
5 |
William Pidgeon |
4 |
The second line plot emphasises on these five artists, highlighting some interesting insights.
The first 41 years of the Archibald prize were dominated by these multi-winners, specifcally winning more than two thirds (68.3%) of Archibald wins
W B McInnes and John Longstaff dominated the 1920-1940 period, collectively winning 12 out 19 Archibalds
William Dargie and Ivor Hele dominated the 1940-1960 period, collectively winning 13 out 20 Archibalds
We see a lot more distribution amongst painters in recent decades, with less occurence of repeat winners.
### plot winning age by year
fig, ax = plt.subplots(figsize=(10, 6))
plt.plot(archies['YEAR'], archies['winning_age'], alpha=0.35)
plt.plot(archies['YEAR'], archies['winning_age'],
marker='o', linestyle='', color='tab:blue')
plt.axhline(y=44, color='red', linestyle='--', lw=1.5, alpha=0.3)
plt.ylim([20, 90])
plt.title('Age at time of Archibald Prize win by year, Median = 44')
plt.show()
############################################
### plot winning age by year and highlight multi-winners
fig, ax = plt.subplots(figsize=(10, 6))
plt.plot(archies['YEAR'], archies['winning_age'], alpha=0.35)
plt.axhline(y=44, color='red', linestyle='--', lw=1.5, alpha=0.3)
### William Dargie
cond = (archies['WINNER'] == 'William Dargie')
plt.plot(archies[cond]['YEAR'], archies[cond]['winning_age'],
marker='o', linestyle='', color='tab:orange', label='William Dargie')
### W B McInnes
cond2 = (archies['WINNER'] == 'W B McInnes')
plt.plot(archies[cond2]['YEAR'], archies[cond2]['winning_age'],
marker='o', linestyle='', color='tab:purple', label='W B McInnes')
### John Longstaff
cond3 = (archies['WINNER'] == 'John Longstaff')
plt.plot(archies[cond3]['YEAR'], archies[cond3]['winning_age'],
marker='o', linestyle='', color='tab:pink', label='John Longstaff')
### Ivor Hele
cond4 = (archies['WINNER'] == 'Ivor Hele')
plt.plot(archies[cond4]['YEAR'], archies[cond4]['winning_age'],
marker='o', linestyle='', color='tab:green', label='Ivor Hele')
### William Pidgeon
cond5 = (archies['WINNER'] == 'William Pidgeon')
plt.plot(archies[cond5]['YEAR'], archies[cond5]['winning_age'],
marker='o', linestyle='', color='tab:red', label='William Pidgeon')
cond_rest = (archies['WINNER'] != 'William Dargie') & (archies['WINNER'] != 'W B McInnes') & \
(archies['WINNER'] != 'John Longstaff') & (archies['WINNER'] != 'Ivor Hele') & \
(archies['WINNER'] != 'William Pidgeon')
plt.plot(archies[cond_rest]['YEAR'], archies[cond_rest]['winning_age'],
marker='o', linestyle='', color='tab:blue', label='Rest of winners')
# adjust legend
ax.legend(loc="upper right", ncol=3)
plt.title('Age at time of Archibald Prize win by year,\n\n')
# add subtitle
plt.text(0.5, 1.05, 'Artists who have won the Archibald Prize more than thrice are highlighted',
horizontalalignment='center', verticalalignment='center',
transform=ax.transAxes, fontsize=10)
plt.ylim([20, 90])
plt.show()
Winning age for Archibald winners (cont.)¶
To consider multi-winners, we assess the average winning age at different milestones in relation to the Archibald Prize (1st win, 2nd winm, etc.). The bar plot shows a similar average (43.5) for first-time winners (highlighted in orange) when compared with the overall median (44). This is likely due to the fact that most artists have only won the prize once (62 artists).
When considering second wins, the average winning age increases to 48.5, but then decreases for subsequent wins. This pattern may be a result of small sample sizes, but also suggests that multi-winners tend to experience early success. The only exception is John Longstaff, who won all his prizes after the age of 64.
Interestingly, William Dargie, who won his eighth and final Archibald Prize, was 44 years old, which is the same as the overall median winning age.
### plot winning age at different milestones
archies['count'] = 0
# create count for each artist
winner_count_dict = dict()
for idx,row in archies.sort_values('YEAR')['WINNER'].iteritems():
if row not in winner_count_dict:
archies.loc[idx,'count'] = 1
winner_count_dict[row] = 1
else:
winner_count_dict[row] = winner_count_dict[row] + 1
archies.loc[idx,'count'] = winner_count_dict[row]
x = [1,2,3,4,5,6,7,8]
y = [
archies[archies['count'] == 1]['winning_age'].median(),
archies[archies['count'] == 2]['winning_age'].median(),
archies[archies['count'] == 3]['winning_age'].median(),
archies[archies['count'] == 4]['winning_age'].median(),
archies[archies['count'] == 5]['winning_age'].median(),
archies[archies['count'] == 6]['winning_age'].median(),
archies[archies['count'] == 7]['winning_age'].median(),
archies[archies['count'] == 8]['winning_age'].median()
]
fig, ax = plt.subplots()
ax.bar(x, y)
ax.bar(x[0], y[0], color='tab:orange')
ax.set_xlabel('Archibald Prize wins')
ax.set_title('Median winning age at different milestones,\n\n')
# add subtitle
plt.text(0.5, 1.05, 'Frequency of winners highlighted in white',
horizontalalignment='center', verticalalignment='center',
transform=ax.transAxes, fontsize=10)
plt.ylim([0, 55])
omit_nowins = (~archies.winning_age.isnull())
for i, v in enumerate(y):
ax.annotate(str(v), (i+1,v*1.005), ha='center', va='bottom', size=11)
ax.annotate(archies[(archies['count'] == i+1) & omit_nowins].shape[0],
(i+1,2), ha='center', va='bottom', size=11, color='white')
plt.show()
/var/folders/rb/mjsh2q916fl5sgghntjck66h0000gn/T/ipykernel_49840/2270394203.py:6: FutureWarning: iteritems is deprecated and will be removed in a future version. Use .items instead.
for idx,row in archies.sort_values('YEAR')['WINNER'].iteritems():
# def upper_rugplot(data, height=.05, ax=None, **kwargs):
# from matplotlib.collections import LineCollection
# ax = ax or plt.gca()
# kwargs.setdefault("linewidth", 1)
# segs = np.stack((np.c_[data, data],
# np.c_[np.ones_like(data), np.ones_like(data)-height]),
# axis=-1)
# lc = LineCollection(segs, transform=ax.get_xaxis_transform(), **kwargs)
# ax.add_collection(lc)
# sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0), 'axes.linewidth':2})
# palette = sns.color_palette("Paired", 8)
# archies_density = archies[archies['count'] < 9].copy()
# archies_density['count_verbose'] = np.where(archies_density['count'] == 1, '1st win', np.nan)
# archies_density['count_verbose'] = np.where(archies_density['count'] == 2, '2nd win', archies_density['count_verbose'])
# archies_density['count_verbose'] = np.where(archies_density['count'] == 3, '3rd win', archies_density['count_verbose'])
# archies_density['count_verbose'] = np.where(archies_density['count'] == 4, '4th win', archies_density['count_verbose'])
# archies_density['count_verbose'] = np.where(archies_density['count'] == 5, '5th win', archies_density['count_verbose'])
# archies_density['count_verbose'] = pd.Categorical(archies_density['count_verbose'],
# categories=['5th win','4th win','3rd win','2nd win','1st win'], ordered=True)
# g = sns.FacetGrid(archies_density, palette=palette, row="count_verbose", hue="count_verbose", aspect=8, height=1.2)
# g.map_dataframe(sns.kdeplot, x="winning_age", fill=True, alpha=0.9)
# g.map_dataframe(sns.kdeplot, x="winning_age", color='black')
# upper_rugplot(archies_density[archies_density['count'] == 8]['winning_age'], color=palette[7], linewidth=2.75, height=0.24, ax=g.axes[1,0])
# upper_rugplot(range(25,85), color='white', linewidth=3, height=.21, ax=g.axes[1,0])
# upper_rugplot(archies_density[archies_density['count'] == 7]['winning_age'], color=palette[6], linewidth=2.75, height=0.21, ax=g.axes[1,0])
# upper_rugplot(range(25,85), color='white', linewidth=3, height=.18, ax=g.axes[1,0])
# upper_rugplot(archies_density[archies_density['count'] == 6]['winning_age'], color=palette[5], linewidth=2.75, height=0.18, ax=g.axes[1,0])
# upper_rugplot(range(25,85), color='white', linewidth=3, height=.15, ax=g.axes[1,0])
# upper_rugplot(archies_density[archies_density['count'] == 5]['winning_age'], color=palette[0], linewidth=2.75, height=0.15, ax=g.axes[1,0])
# upper_rugplot(range(25,85), color='white', linewidth=3, height=.12, ax=g.axes[1,0])
# upper_rugplot(archies_density[archies_density['count'] == 4]['winning_age'], color=palette[1], linewidth=2.75, height=.12, ax=g.axes[1,0])
# upper_rugplot(range(25,85), color='white', linewidth=3, height=.09, ax=g.axes[1,0])
# upper_rugplot(archies_density[archies_density['count'] == 3]['winning_age'], color=palette[2], linewidth=2.75, height=.09, ax=g.axes[1,0])
# upper_rugplot(range(25,85), color='white', linewidth=3, height=.06, ax=g.axes[1,0])
# upper_rugplot(archies_density[archies_density['count'] == 2]['winning_age'], color=palette[3], linewidth=2.75, height=.06, ax=g.axes[1,0])
# upper_rugplot(range(25,85), color='white', linewidth=3, height=.03, ax=g.axes[1,0])
# upper_rugplot(archies_density[archies_density['count'] == 1]['winning_age'], color=palette[4], linewidth=2.75, height=.03, ax=g.axes[1,0])
# def label(x, color, label):
# ax = plt.gca()
# ax.text(0.9, .1, label, color=color, fontsize=13,
# ha="left", va="center", transform=ax.transAxes)
# g.map(label, "count_verbose")
# g.fig.subplots_adjust(hspace=-0.8)
# g.set_titles("")
# g.set(yticks=[], xlabel="", ylabel="", ylim=[0, 0.045])
# g.despine( left=True)
# plt.suptitle('Distribution of winning age at different milestones', x=0.52, y=0.9)
# plt.show()
from IPython.display import Image
Image(filename='images/StackedDensity.png', width=800)
By the time the average participant achieves their first Archibald Prize, William Dargie had already secured his eighth Archibald win.
Winning age for Archibald winners by vicennium¶
By analysing the winning age data by milestone and decade, we can observe that the average winning age for first-time winners has experienced fluctuations over time. During the 1920-1940 period, the median winning age for first-time winners was 35. However, this average rose to 46.5 over the next forty years and then dropped back to 40 in the 2000s. A similar pattern was observed for second-time winners, with a peak median of 60.5 in the 1980-2000 period.
As illustrated in previous visualisations, third-time winners and beyond tend to occur more often in earlier decades. The last artist to win three Archibald prizes was Eric John Smith in 1982 at the age of 63.
fig, ax = plt.subplots(figsize=(9, 5))
### groupby mean age of winners by vicennium
archies[archies['count'] == 1].groupby('year_vicennium')['winning_age'].median().reset_index().\
plot(x='year_vicennium', y='winning_age', marker='o', ax=ax, label='1st win')
archies[archies['count'] == 2].groupby('year_vicennium')['winning_age'].median().reset_index().\
plot(x='year_vicennium', y='winning_age', marker='o',ax=ax, label='2nd win')
archies[archies['count'] == 3].groupby('year_vicennium')['winning_age'].median().reset_index().\
plot(x='year_vicennium', y='winning_age', marker='o',ax=ax, label='3rd win')
archies[archies['count'] > 3].groupby('year_vicennium')['winning_age'].median().reset_index().\
plot(x='year_vicennium', y='winning_age', marker='o',ax=ax, label='4th win & \nbeyond')
ax.set(xlabel="Vicennium", ylabel="")
plt.grid(axis='x')
plt.xticks([1920,1940,1960,1980,2000],
['1920-1940', '1940-1960', '1960-1980','1980-2000', '2000-'])
# plt.title('Average winning age by $\it{n}$th win, 20-year periods')
plt.title('Average winning age by nth win, 20-year periods')
# add legend 2 by 2
plt.legend(facecolor='white', loc='upper right', ncol=1)
plt.show()
Colour and Brightness¶
Colour over time¶
# # Takes 3 minutes to run
# import matplotlib.patches as patches
# import matplotlib.image as mpimg
# from PIL import Image
# from matplotlib.offsetbox import OffsetImage, AnnotationBbox
# # !pip install easydev #version 0.12.0
# # !pip install colormap #version 1.0.4
# # !pip install opencv-python #version 4.5.5.64
# # !pip install colorgram.py #version 1.2.0
# # !pip install extcolors #version 1.0.0
# # !pip install colormath #version 3.0.0
# # !pip install webcolors #version 1.11.1
# import cv2
# import extcolors
# from colormap import rgb2hex
# from colormath.color_objects import sRGBColor, LabColor
# from colormath.color_conversions import convert_color
# from colormath.color_diff import delta_e_cie2000
# import webcolors
# def get_closest_color(requested_color, color_map):
# requested_color = sRGBColor(*requested_color)
# requested_color = convert_color(requested_color, LabColor)
# min_distance = float("inf")
# closest_color = None
# for color_name, color_rgb in color_map.items():
# color = sRGBColor(*color_rgb)
# color = convert_color(color, LabColor)
# distance = delta_e_cie2000(requested_color, color)
# if distance < min_distance:
# min_distance = distance
# closest_color = color_name
# return closest_color
# color_map = {color_name: webcolors.name_to_rgb(color_name) for color_name in webcolors.CSS3_NAMES_TO_HEX.keys()}
# from os import listdir
# from os.path import isfile, join
# onlyfiles = [f for f in listdir('./images/ArchibaldWinners') if isfile(join('./images/ArchibaldWinners', f))]
# def color_to_df(input):
# colors_pre_list = str(input).replace('([(','').split(', (')[0:-1]
# df_rgb = [i.split('), ')[0] + ')' for i in colors_pre_list]
# df_percent = [i.split('), ')[1].replace(')','') for i in colors_pre_list]
# #convert RGB to HEX code
# df_color_up = [rgb2hex(int(i.split(", ")[0].replace("(","")),
# int(i.split(", ")[1]),
# int(i.split(", ")[2].replace(")",""))) for i in df_rgb]
# df = pd.DataFrame(zip(df_color_up, df_percent), columns = ['c_code','occurence'])
# return df
# df_colors = pd.DataFrame(columns = ['c_code','occurence'])
# onlyfiles.sort()
# for f in onlyfiles:
# colors_x = extcolors.extract_from_path('./images/ArchibaldWinners/' + f,
# tolerance = 12, limit = 25)
# df_color = color_to_df(colors_x)
# df_color['proportion'] = df_color['occurence'].astype(float) / df_color['occurence'].astype(float).sum()
# df_color['rank'] = df_color['proportion'].rank(ascending=False)
# df_color['color_name'] = df_color.c_code.\
# apply(lambda x: get_closest_color(webcolors.hex_to_rgb(x), color_map))
# df_color['year'] = f[:4]
# # df_colors = df_colors.append(df_color, ignore_index=True)
# df_colors = pd.concat([df_colors, df_color], ignore_index=True)
# df_colors.to_csv('data/Archibald_colors.csv', index=False)
# Fetch colour data for every Archibald winning potrait
df_colors = pd.read_csv('data/Archibald_colors.csv')
# create a new column for the year in 10 year intervals
df_colors['year_vicennium'] = df_colors['year'].astype(int).apply(lambda x: x - x % 20)
df_colors['year_vicennium'] = np.where(df_colors['year_vicennium'] == 2020, 2000, df_colors['year_vicennium'])
# create a new column for the proportion of colors in each year
len_20_cols = df_colors[df_colors['year_vicennium'] == 1920]['year'].nunique()
len_40_cols = df_colors[df_colors['year_vicennium'] == 1940]['year'].nunique()
len_60_cols = df_colors[df_colors['year_vicennium'] == 1960]['year'].nunique()
len_80_cols = df_colors[df_colors['year_vicennium'] == 1980]['year'].nunique()
len_00_cols = df_colors[df_colors['year_vicennium'] == 2000]['year'].nunique()
df_colors['proportion2'] = np.where(df_colors['year_vicennium'] == 1920, df_colors['proportion']/len_20_cols, np.nan)
df_colors['proportion2'] = np.where(df_colors['year_vicennium'] == 1940, df_colors['proportion']/len_40_cols, df_colors['proportion2'])
df_colors['proportion2'] = np.where(df_colors['year_vicennium'] == 1960, df_colors['proportion']/len_60_cols, df_colors['proportion2'])
df_colors['proportion2'] = np.where(df_colors['year_vicennium'] == 1980, df_colors['proportion']/len_80_cols, df_colors['proportion2'])
df_colors['proportion2'] = np.where(df_colors['year_vicennium'] == 2000, df_colors['proportion']/len_00_cols, df_colors['proportion2'])
for y in df_colors['year_vicennium'].unique():
# get the top 5 colors for each year
top5cols = df_colors[df_colors.year_vicennium == y]\
.groupby(['year_vicennium','color_name'])\
.agg({'proportion2':'sum'}).reset_index()\
.sort_values(['year_vicennium','proportion2'], ascending=[True,False])\
.groupby('year_vicennium')\
.head(5)['color_name'].unique()
df_colors_top5 = df_colors[df_colors.color_name.isin(top5cols)]\
.groupby(['year_vicennium','color_name'])\
.agg({'proportion2':'sum'}).reset_index()\
.sort_values(['year_vicennium','proportion2'], ascending=[True,False])\
.groupby('year_vicennium')\
.head(5)
df_colors_top5_pivot = df_colors_top5\
.pivot(index='year_vicennium', columns='color_name', values='proportion2')\
.fillna(0)
# create a new column for the RGB values of each color
df_colors_top5['red'] = df_colors_top5['color_name']\
.apply(lambda x: int(CSS3_NAMES_TO_HEX[x][1:3], 16))
df_colors_top5['green'] = df_colors_top5['color_name']\
.apply(lambda x: int(CSS3_NAMES_TO_HEX[x][3:5], 16))
df_colors_top5['blue'] = df_colors_top5['color_name']\
.apply(lambda x: int(CSS3_NAMES_TO_HEX[x][5:], 16))
# Define a list of column names to order by
ordered_columns = df_colors_top5\
.sort_values(['red','green','blue'], ascending=[True,True,True])['color_name']\
.unique()
# Reorder the columns
df_colors_top5_pivot = df_colors_top5_pivot.reindex(columns=ordered_columns)
# plot stacked area chart
df_colors_top5_pivot\
.plot.area(stacked=True, figsize=(10,5),
color=df_colors_top5_pivot.columns,
alpha=0.825)
# add labels for sum of proportions for each year
for i,x in enumerate(df_colors_top5_pivot.index):
plt.text(x, df_colors_top5_pivot.iloc[i].sum()+0.05,
f'{df_colors_top5_pivot.iloc[i].sum():.0%}',
ha='center', va='center', fontsize=12)
plt.legend(loc='upper center', ncol=5, bbox_to_anchor=(0.5, 1.1))
# Set the y-axis limts
plt.ylim(0, 0.85)
# Set the y-axis formatter to show percentages
plt.gca().yaxis.set_major_formatter(mtick.PercentFormatter(1.0))
plt.xlabel("")
plt.grid(axis='x')
plt.xticks([1920,1940,1960,1980,2000],
['1920-1940', '1940-1960', '1960-1980','1980-2000', '2000-'])
if y == 2000: plus20 = 2022
else: plus20 = y + 20
plt.title(f'Proportion of top five colors in Archibald winning portraits during {y}-{plus20}, 20-year periods\n\n')
plt.show()
top30cols = df_colors\
.groupby(['year_vicennium','color_name'])\
.agg({'proportion2':'sum'}).reset_index()\
.sort_values(['year_vicennium','proportion2'], ascending=[True,False])\
.groupby('year_vicennium')\
.head(5)['color_name'].unique()
top30cols_df = df_colors[df_colors.color_name.isin(top30cols)]\
.groupby(['year_vicennium','color_name'])\
.agg({'proportion2':'sum'}).reset_index()\
.sort_values(['year_vicennium','proportion2'], ascending=[True,False])\
.groupby('year_vicennium')\
.head(100)
# plot the top 30 colors for each year as time series
fig, ax = plt.subplots(figsize=(8,6))
# line plot with markers at start and end of each line
sns.lineplot(x='year_vicennium', y='proportion2', hue='color_name', data=top30cols_df,
palette=sns.color_palette(top30cols_df.color_name.unique(), len(top30cols)), ax=ax,
alpha=0.6, linewidth=2.5)
# plot start and end markers
sns.scatterplot(x='year_vicennium', y='proportion2', hue='color_name',
data=top30cols_df[(top30cols_df.year_vicennium == 1920) | (top30cols_df.year_vicennium == 2000)],
palette=sns.color_palette(top30cols_df.color_name.unique(), len(top30cols)), ax=ax,
s=50, marker='o', legend=False)
# no legend
ax.legend().remove()
ax.set_title('Top colours over time')
ax.set_xlabel('')
ax.set_ylabel('')
plt.grid(axis='x', alpha=0.5)
plt.ylim(-.0275, 0.3)
plt.xticks([1905, 1920,1940,1960,1980,2000, 2012],
['','1920-1940', '1940-1960', '1960-1980','1980-2000', '2000-',''])
# add annotation of color_name for each marker
ax.annotate('navy', (1913, 0.257), textcoords="offset points", xytext=(0,10), ha='center', size=12, color='navy')
ax.annotate('midnight\nblue', (1913, 0.14), textcoords="offset points", xytext=(0,10), ha='center', size=12, color='midnightblue')
ax.annotate('maroon', (1913, 0.12), textcoords="offset points", xytext=(0,10), ha='center', size=12, color='maroon')
ax.annotate('saddle\nbrown', (1913, 0.07), textcoords="offset points", xytext=(0,10), ha='center', size=12, color='saddlebrown')
ax.annotate('black', (1913, 0.055), textcoords="offset points", xytext=(0,10), ha='center', size=12, color='black')
ax.annotate('olive', (2006, 0.015), textcoords="offset points", xytext=(0,10), ha='center', size=12, color='olive')
ax.annotate('dark\ngreen', (1913, -0.0275), textcoords="offset points", xytext=(0,10), ha='center', size=12, color='darkgreen')
ax.annotate('peru', (1913, 0.01), textcoords="offset points", xytext=(0,10), ha='center', size=12, color='peru')
ax.annotate('tan', (1913, 0.027), textcoords="offset points", xytext=(0,10), ha='center', size=12, color='tan')
ax.annotate('sienna', (1913, -.0025), textcoords="offset points", xytext=(0,10), ha='center', size=12, color='sienna')
ax.annotate('thistle', (2006.5, 0.0325), textcoords="offset points", xytext=(0,10), ha='center', size=12, color='thistle')
ax.annotate('dark\nslate\ngray', (2006.5, -0.025), textcoords="offset points", xytext=(0,10), ha='center', size=12, color='darkslategray')
# show plot
plt.show()
Brightness over time¶
# def brightness( im_file ):
# im = Image.open(im_file)
# stat = Stat(im)
# r,g,b = stat.mean
# return math.sqrt(0.299*(r**2) + 0.587*(g**2) + 0.114*(b**2))
# onlyfiles = [f for f in listdir('./images/ArchibaldWinners') if isfile(join('./images/ArchibaldWinners', f))]
# # sort image files in decade dictionary
# images_df = pd.DataFrame(onlyfiles)
# images_df['year'] = images_df[0].apply(lambda x: int(x.split('_')[0]))
# images_df.loc[images_df[0] == '1990_SID78808M.jpg.641x900_q85.jpg','year'] = 1991
# images_df['decade'] = [ int(np.floor(int(year)/10) * 10)
# for year in np.array(images_df["year"])]
# images_df['brightness'] = images_df[0].apply(lambda x: brightness('./images/ArchibaldWinners/' + x))
# # create figure
# fig = plt.figure(figsize=(14, 8))
# ax = plt.axes()
# peaks = images_df[images_df['year'].isin([1921,1930,1936,1942,1947,1956,
# 1965, 1966,1978,1989,
# 2001,2002,2006,2014,2015,2022])]
# ax.plot(images_df.sort_values('year')['year'],
# images_df.sort_values('year')['brightness'])
# ax.plot(peaks.sort_values('year')['year'],
# peaks.sort_values('year')['brightness'], "o", color='#1f77b4')
# # Draw image
# arr_image = plt.imread('./images/ArchibaldWinners/' + \
# images_df[images_df.year == 1936].iloc[0][0], format='jpg')
# axin = ax.inset_axes([1926,136,15,55],transform=ax.transData) # create new inset axes in data coordinates
# axin.imshow(arr_image)
# axin.axis('off')
# arr_image = plt.imread('./images/ArchibaldWinners/' + \
# images_df[images_df.year == 1942].iloc[0][0], format='jpg')
# axin = ax.inset_axes([1935,163,15,55],transform=ax.transData) # create new inset axes in data coordinates
# axin.imshow(arr_image)
# axin.axis('off')
# arr_image = plt.imread('./images/ArchibaldWinners/' + \
# images_df[images_df.year == 1956].iloc[0][0], format='jpg')
# axin = ax.inset_axes([1948,165,15,55],transform=ax.transData) # create new inset axes in data coordinates
# axin.imshow(arr_image)
# axin.axis('off')
# arr_image = plt.imread('./images/ArchibaldWinners/'+ \
# images_df[images_df.year == 1966].iloc[0][0], format='jpg')
# axin = ax.inset_axes([1961,165,15,55],transform=ax.transData) # create new inset axes in data coordinates
# axin.imshow(arr_image)
# axin.axis('off')
# arr_image = plt.imread('./images/ArchibaldWinners/' + \
# images_df[images_df.year == 1989].iloc[0][0], format='jpg')
# axin = ax.inset_axes([1982,199,15,55],transform=ax.transData) # create new inset axes in data coordinates
# axin.imshow(arr_image)
# axin.axis('off')
# arr_image = plt.imread('./images/ArchibaldWinners/' + \
# images_df[images_df.year == 1978].iloc[0][0], format='jpg')
# axin = ax.inset_axes([1970,195,15,55],transform=ax.transData) # create new inset axes in data coordinates
# axin.imshow(arr_image)
# axin.axis('off')
# arr_image = plt.imread('./images/ArchibaldWinners/' + \
# images_df[images_df.year == 2002].iloc[0][0], format='jpg')
# axin = ax.inset_axes([1994,210,15,55],transform=ax.transData) # create new inset axes in data coordinates
# axin.imshow(arr_image)
# axin.axis('off')
# arr_image = plt.imread('./images/ArchibaldWinners/' + \
# images_df[images_df.year == 2014].iloc[0][0], format='jpg')
# axin = ax.inset_axes([2007,205,15,55],transform=ax.transData) # create new inset axes in data coordinates
# axin.imshow(arr_image)
# axin.axis('off')
# arr_image = plt.imread('./images/ArchibaldWinners/' + \
# images_df[images_df.year == 1921].iloc[0][0], format='jpg')
# axin = ax.inset_axes([1906,-30,15,55],transform=ax.transData) # create new inset axes in data coordinates
# axin.imshow(arr_image)
# axin.axis('off')
# arr_image = plt.imread('./images/ArchibaldWinners/' + \
# images_df[images_df.year == 2022].iloc[0][0], format='jpg')
# axin = ax.inset_axes([2021,40,15,65],transform=ax.transData) # create new inset axes in data coordinates
# axin.imshow(arr_image)
# axin.axis('off')
# arr_image = plt.imread('./images/ArchibaldWinners/' + \
# images_df[images_df.year == 1930].iloc[0][0], format='jpg')
# axin = ax.inset_axes([1922,-46.5,15,55],transform=ax.transData) # create new inset axes in data coordinates
# axin.imshow(arr_image)
# axin.axis('off')
# arr_image = plt.imread('./images/ArchibaldWinners/' + \
# images_df[images_df.year == 1947].iloc[0][0], format='jpg')
# axin = ax.inset_axes([1940,-35,15,55],transform=ax.transData) # create new inset axes in data coordinates
# axin.imshow(arr_image)
# axin.axis('off')
# arr_image = plt.imread('./images/ArchibaldWinners/' + \
# images_df[images_df.year == 1965].iloc[0][0], format='jpg')
# axin = ax.inset_axes([1958,-30,15,55],transform=ax.transData) # create new inset axes in data coordinates
# axin.imshow(arr_image)
# axin.axis('off')
# arr_image = plt.imread('./images/ArchibaldWinners/' + \
# images_df[images_df.year == 2001].iloc[0][0], format='jpg')
# axin = ax.inset_axes([1991,-20,15,55],transform=ax.transData) # create new inset axes in data coordinates
# axin.imshow(arr_image)
# axin.axis('off')
# arr_image = plt.imread('./images/ArchibaldWinners/' + \
# images_df[images_df.year == 2006].iloc[0][0], format='jpg')
# axin = ax.inset_axes([1999.5,-35,15,55],transform=ax.transData) # create new inset axes in data coordinates
# axin.imshow(arr_image)
# axin.axis('off')
# arr_image = plt.imread('./images/ArchibaldWinners/' + \
# images_df[images_df.year == 2015].iloc[0][0], format='jpg')
# axin = ax.inset_axes([2009.5,-35,15,55],transform=ax.transData) # create new inset axes in data coordinates
# axin.imshow(arr_image)
# axin.axis('off')
# for tick in ax.xaxis.get_major_ticks(): tick.label1.set_fontsize(14)
# for tick in ax.yaxis.get_major_ticks(): tick.label1.set_fontsize(14)
# plt.title('Brightness over time, Archibald Winners', size=18)
# ax.set_ylim([-49.5, 280])
# ax.set_xlim([1905, 2035])
# plt.show()
from IPython.display import Image
Image(filename='images/Brightness_python.png')
Participation¶
archies
YEAR | WINNER | GENDER | DOB | DOD | Unnamed: 5 | PORTRAIT TITLE | Sitter | DOB.1 | Sitter Age | ... | PORTRAIT GENDER | PORTRAIT OCC (Copy/Paste) | OCC. CATEGORY (1) | OCC. CATEGORY (2) | ANZSCO_1 | ANZSCO_2 | Comments | year_vicennium | winning_age | count | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1921 | W B McInnes | Male | 1889.0 | 1939.0 | 32 | Desbrowe Annear | Harold Desbrowe-Annear | 1865.0 | 56.0 | ... | Male | NaN | Architect | Architect | Design, Engineering, Science and Transport Pro... | Professionals | With his (McInnes) wife, fellow artist Violet ... | 1920 | 32.0 | 1 |
1 | 1922 | W B McInnes | Male | 1889.0 | 1939.0 | 33 | Professor Harrison Moore | William Harrison Moore | 1867.0 | 55.0 | ... | Male | constitutional lawyer and dean of the law facu... | Professor | Professor | Education Professionals | Professionals | New rules were added to the competition this y... | 1920 | 33.0 | 2 |
2 | 1923 | W B McInnes | Male | 1889.0 | 1939.0 | 34 | Portrait of a lady | Violet McInnes | 1892.0 | 31.0 | ... | Female | wife, Violet McInnes | Wife | Person | NaN | NaN | WB McInnes’s Portrait of a lady – his third wi... | 1920 | 34.0 | 3 |
3 | 1924 | W B McInnes | Male | 1889.0 | 1939.0 | 35 | Miss Collins | Gladys Neville Collins | 1890.0 | 34.0 | ... | Female | socialite, daughter of Joseph Thomas Collins | Socialite | Person | NaN | NaN | The first known Archibald portrait of an Indig... | 1920 | 35.0 | 4 |
4 | 1925 | John Longstaff | Male | 1861.0 | 1941.0 | 64 | Maurice Moscovitch | Maurice Moscovitch | 1871.0 | 54.0 | ... | Male | Russian-born actor | Actor | Actor | Arts and Media Professionals | Professionals | For the first time, the subject of the winning... | 1920 | 64.0 | 1 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
97 | 2018 | Yvette Coppersmith | Female | 1980.0 | NaN | 38 | Self-portrait, after George Lambert | Yvette Coppersmith | 1980.0 | 38.0 | ... | Female | NaN | Self Portrait | Artist | Arts and Media Professionals | Professionals | Yvette Coppersmith became the tenth woman to w... | 2000 | 38.0 | 1 |
98 | 2019 | Tony Costa | Male | 1955.0 | NaN | 64 | Lindy Lee | Lindy Lee | 1954.0 | 65.0 | ... | Female | artist and a Zen Buddhist | Artist | Artist | Arts and Media Professionals | Professionals | A year for the record books. There were more e... | 2000 | 64.0 | 1 |
99 | 2020 | Vincent Namatjira | Male | 1983.0 | NaN | 37 | Stand strong for who you are | Adam Goodes | 1980.0 | 40.0 | ... | Male | Adam Goodes | Sports | Sports | Sports and Personal Service Workers | Community and Personal Service Workers | Records were broken again. The number of entri... | 2000 | 37.0 | 1 |
100 | 2021 | Peter Wegner | Male | 1953.0 | NaN | 68 | Portrait of Guy Warren at 100 | Guy Warren | 1921.0 | 100.0 | ... | Male | artist Guy Warren | Artist | Artist | Arts and Media Professionals | Professionals | For the first time, there is gender parity for... | 2000 | 68.0 | 1 |
101 | 2022 | Blak Douglas | Male | 1970.0 | NaN | 52 | Moby Dickens | Karla Dickens | 1967.0 | 55.0 | ... | Female | Wiradjuri artist | Artist | Artist | Arts and Media Professionals | Professionals | This year saw the highest known number of entr... | 2000 | 52.0 | 1 |
102 rows × 21 columns
Participation over time¶
no_participants = pd.read_csv('data/no_participants_new.csv')
fig = plt.figure(figsize=(12, 8))
ax = plt.axes()
ax.plot(no_participants.sort_values('Year')['Year'],
no_participants.sort_values('Year')['Entries'], label="Entries")
ax.plot(no_participants.sort_values('Year')['Year'],
no_participants.sort_values('Year')['Selected'], label="Selected")
# plt.axvspan(1981, 1986, alpha=0.1, color='orange')
# plt.text(1982.5, 1060, 'Katies', ha='center', va='center',size=12, rotation=90)
# plt.axvspan(1986, 1988, alpha=0.1, color='yellow')
# plt.text(1987.25, 1000, 'Grace Brothers', ha='center', va='center',size=12, rotation=90)
# plt.axvspan(1988, 1992, alpha=0.1, color='green')
# plt.text(1989.5, 1000, 'Coles Myer Ltd', ha='center', va='center',size=12, rotation=90)
# plt.axvspan(1992, 2006, alpha=0.1, color='red')
# plt.text(1994.5, 890, 'State Bank/ Colonial Group', ha='center', va='center',
# size=12, rotation=90)
# plt.axvspan(2006, 2009, alpha=0.1, color='green')
# plt.text(2007.5, 1080, 'Myer', ha='center', va='center',size=12,rotation=90)
# plt.axvspan(2009, 2023, alpha=0.1, color='blue')
# plt.text(2010.5, 1080, 'ANZ', ha='center', va='center',size=12,rotation=90)
plt.title('Number of Entries and Selected, Archibald Prize')
plt.legend()
plt.show()
# from IPython.display import Image
# Image(filename='images/participationrates_python.png')
Prize Money¶
prize_money = pd.read_csv('data/Archibald_PrizeMoney2.csv', index_col=0)
plt.figure(figsize=(12, 8))
# Set the x-axis to the year column
x = prize_money.index
# Set the y-axis to the value column
y = prize_money['AUD_Equivalent']
# Create a line plot of the data
plt.plot(x, y)
# Add labels and a title
plt.xlabel('')
plt.title('Archibald prize money by year', size=16, pad=10, loc='left')
plt.axvspan(1981, 1986, alpha=0.1, color='orange')
# plt.text(1982.5, 103500, 'Katies', ha='center', va='center',size=14, rotation=90)
plt.axvspan(1986, 1988, alpha=0.1, color='yellow')
# plt.text(1987.25, 96000, 'Grace Brothers', ha='center', va='center',size=14, rotation=90)
plt.axvspan(1988, 1992, alpha=0.1, color='green')
# plt.text(1989.5, 96000, 'Coles Myer Ltd', ha='center', va='center',size=14, rotation=90)
plt.axvspan(1992, 2006, alpha=0.1, color='red')
# plt.text(1994.5, 85000, 'State Bank/ Colonial Group', ha='center', va='center',
# size=14, rotation=90)
plt.axvspan(2006, 2009, alpha=0.1, color='green')
# plt.text(2007.5, 105000, 'Myer', ha='center', va='center',size=14,rotation=90)
plt.axvspan(2009, 2023, alpha=0.1, color='blue')
# plt.text(2010.5, 105000, 'ANZ', ha='center', va='center',size=14,rotation=90)
# Format the y-axis labels as a monetary amount
plt.gca().yaxis.set_major_formatter(StrMethodFormatter('${x:,.0f} AUD'))
plt.yticks(size=14)
plt.xticks(size=14)
plt.ylim(0,110000)
plt.xlim(1915,2023)
plt.grid(axis='y')
# save figure
plt.savefig('prize_money.png', dpi=330, bbox_inches='tight')
# Show the plot
plt.show()
plt.figure(figsize=(16, 8))
missing_cond = (prize_money.index > 1964) & (prize_money.index < 1968)
missing_cond2 = (prize_money.index > 1970) & (prize_money.index < 1974)
# Set the x-axis to the year column
x = prize_money[prize_money.index < 1981].index
x2 = prize_money[missing_cond].index
x3 = prize_money[missing_cond2].index
# Set the y-axis to the value column
y = prize_money[prize_money.index < 1981]['AUD_Equivalent']
y2 = prize_money[missing_cond]['AUD_Equivalent'].ffill()
y3 = prize_money[missing_cond2]['AUD_Equivalent'].ffill()
# Create a line plot of the data
plt.plot(x, y)
plt.plot(x2, y2, linestyle='dashed',color='steelblue',alpha=0.5)
plt.plot(x3, y3, linestyle='dashed',color='steelblue',alpha=0.5)
# Add labels and a title
plt.xlabel('')
plt.title('Archibald Prize Money by Year, Before Official Sponsors (1921-1980) ', size=22)
# Format the y-axis labels as a monetary amount
plt.gca().yaxis.set_major_formatter(StrMethodFormatter('${x:,.0f} AUD'))
plt.text(1930, 1250, 'GREAT DEPRESSION', ha='left', va='center',size=14)
plt.text(1939, 1150, 'WORLD WAR II', ha='left', va='center',size=14)
plt.text(1964, 1225, 'AUSTRALIAN DOLLAR INTRODUCED', ha='left', va='center',size=14)
plt.text(1969, 3900, 'DONATION OF $2000 FROM THE BICENTENARY CELEBRATIONS CITIZENS’ COMMITTEE', ha='right', va='center',size=14)
plt.yticks(size=14)
plt.xticks(size=14)
plt.ylim(1,4250)
# Show the plot
plt.show()
no_participants = pd.read_csv('data/no_participants_new.csv', index_col=0)
plt.figure(figsize=(16, 8))
missing_cond = (prize_money.index > 1964) & (prize_money.index < 1968)
missing_cond2 = (prize_money.index > 1970) & (prize_money.index < 1974)
# Set the x-axis to the year column
x = prize_money[prize_money.index < 1981].index
x2 = prize_money[missing_cond].index
x3 = prize_money[missing_cond2].index
x4 = no_participants[no_participants.Year < 1981]['Year']
# Set the y-axis to the value column
y = prize_money[prize_money.index < 1981]['AUD_Equivalent']
y2 = prize_money[missing_cond]['AUD_Equivalent'].ffill()
y3 = prize_money[missing_cond2]['AUD_Equivalent'].ffill()
y4 = no_participants[no_participants.Year < 1981]['Entries']
ax = plt.subplot(2, 1, 1)
# Create a line plot of the data
ax.plot(x, y)
ax.plot(x2, y2, linestyle='dashed',color='steelblue',alpha=0.5)
ax.plot(x3, y3, linestyle='dashed',color='steelblue',alpha=0.5)
beforesponsors = pd.merge(no_participants[no_participants.Year < 1981],
prize_money[prize_money.index < 1981].reset_index())
cor = beforesponsors['Entries'].corr(beforesponsors['AUD_Equivalent']).round(2)
# Add labels and a title
plt.xlabel('')
plt.title(f'Archibald Prize Money and Number of Entries by Year, Before Official Sponsors (1921-1980), Corr: {cor} ', size=22)
# Format the y-axis labels as a monetary amount
plt.gca().yaxis.set_major_formatter(StrMethodFormatter('${x:,.0f} AUD'))
plt.yticks(size=14)
plt.xticks(size=14)
plt.ylim(1,4250)
plt.grid(axis='x')
ax2 = plt.subplot(2, 1, 2)
ax2.plot(x4, y4, color = 'tab:orange')
plt.yticks(size=14)
plt.xticks(size=14)
plt.ylim(1,445)
plt.grid(axis='x')
# Show the plot
plt.show()
plt.figure(figsize=(16, 8))
# Set the x-axis to the year column
x = prize_money[prize_money.index >= 1981].index
# Set the y-axis to the value column
y = prize_money[prize_money.index >= 1981]['AUD_Equivalent']
# Create a line plot of the data
plt.plot(x, y)
# Add labels and a title
plt.xlabel('')
plt.title('Archibald Prize Money by Year, Sponsors/Partners Era (1981-) ', size=22)
plt.axvspan(1980, 1986, alpha=0.1, color='orange')
plt.text(1980.75, 103500, 'Katies', ha='center', va='center',size=14, rotation=90)
plt.axvspan(1986, 1988, alpha=0.1, color='yellow')
plt.text(1986.5, 96000, 'Grace Brothers', ha='center', va='center',size=14, rotation=90)
plt.axvspan(1988, 1992, alpha=0.1, color='green')
plt.text(1988.5, 96000, 'Coles Myer Ltd', ha='center', va='center',size=14, rotation=90)
plt.axvspan(1992, 2006, alpha=0.1, color='red')
plt.text(1993, 96000, 'State Bank/ Colonial Group', ha='center', va='center',
size=14, rotation=90)
plt.axvspan(2006, 2009, alpha=0.1, color='green')
plt.text(2006.5, 105000, 'Myer', ha='center', va='center',size=14,rotation=90)
plt.axvspan(2009, 2023, alpha=0.1, color='blue')
plt.text(2009.5, 105000, 'ANZ', ha='center', va='center',size=14,rotation=90)
# Format the y-axis labels as a monetary amount
plt.gca().yaxis.set_major_formatter(StrMethodFormatter('${x:,.0f} AUD'))
plt.yticks(size=14)
plt.xticks(size=14)
plt.ylim(0,110000)
plt.xlim(1980.1,2023)
# Show the plot
plt.show()
plt.figure(figsize=(16, 8))
# Set the x-axis to the year column
x = prize_money[prize_money.index >= 1981].index
x2 = no_participants[no_participants.Year >= 1981]['Year']
# Set the y-axis to the value column
y = prize_money[prize_money.index >= 1981]['AUD_Equivalent']
y2 = no_participants[no_participants.Year >= 1981]['Entries']
# Create a line plot of the data
ax = plt.subplot(2, 1, 1)
ax.plot(x, y, lw= 2, label='Prize Money')
plt.axvspan(1980, 1986, alpha=0.05, color='orange')
plt.axvspan(1986, 1988, alpha=0.05, color='yellow')
plt.axvspan(1988, 1992, alpha=0.05, color='green')
plt.axvspan(1992, 2006, alpha=0.05, color='red')
plt.axvspan(2006, 2009, alpha=0.05, color='green')
plt.axvspan(2009, 2023, alpha=0.05, color='blue')
# Format the y-axis labels as a monetary amount
plt.gca().yaxis.set_major_formatter(StrMethodFormatter('${x:,.0f} AUD'))
plt.yticks(size=14)
plt.xticks(size=14)
plt.ylim(0,110000)
plt.xlim(1980.1,2023)
beforesponsors = pd.merge(no_participants[no_participants.Year >= 1981],
prize_money[prize_money.index >= 1981].reset_index())
cor = beforesponsors['Entries'].corr(beforesponsors['AUD_Equivalent']).round(2)
# Add labels and a title
plt.xlabel('')
plt.title(f'Archibald prize money and number of entries by year,\nSponsors/Partners era (1981-), Corr: {cor} ', size=22)
# Format the y-axis labels as a monetary amount
plt.gca().yaxis.set_major_formatter(StrMethodFormatter('${x:,.0f} AUD'))
plt.yticks(size=14)
plt.xticks(size=14)
plt.grid(axis='x')
# add y-axis label, change angle
plt.legend(title='', loc='upper left', fontsize=14)
ax2 = plt.subplot(2, 1, 2)
ax2.plot(x2, y2, color = 'tab:orange', lw= 2, label='Number of entries')
plt.axvspan(1980, 1986, alpha=0.05, color='orange')
plt.axvspan(1986, 1988, alpha=0.05, color='yellow')
plt.axvspan(1988, 1992, alpha=0.05, color='green')
plt.axvspan(1992, 2006, alpha=0.05, color='red')
plt.axvspan(2006, 2009, alpha=0.05, color='green')
plt.axvspan(2009, 2023, alpha=0.05, color='blue')
plt.yticks(size=14)
plt.xticks(size=14)
plt.ylim(0,1190)
plt.grid(axis='x')
plt.xlim(1980.1,2023)
# add y-axis label, change angle
plt.legend(title='', loc='upper left', fontsize=14)
# save figure
# plt.savefig('correlation.png', dpi=330, bbox_inches='tight')
# Show the plot
plt.show()
Participant characterstics¶
# Archibald start age
print(pd.DataFrame(artist_stats[1].describe()).T, '\n')
# Archibald end age
print(pd.DataFrame(artist_stats[2].describe()).T, '\n')
# Archibald overall participation duration
print(pd.DataFrame(artist_stats['diff'].describe()).T, '\n')
count mean std min 25% 50% 75% max
1 62.0 38.032258 10.850652 19.0 31.0 35.0 45.0 64.0
count mean std min 25% 50% 75% max
2 62.0 59.209677 12.787208 33.0 48.0 61.0 68.75 81.0
count mean std min 25% 50% 75% max
diff 62.0 21.177419 14.801998 3.0 11.0 16.5 31.75 62.0
Archibald Prize participation trajectory¶
tt = artist_df.T
fig, axes = plt.subplots(tt.shape[1], 1,
figsize=(9, tt.shape[1]*1.25),
sharex=True)
# plot each col onto one ax
for idx,(col, ax) in enumerate(zip(tt.columns, axes.flat)):
colour = 'green'
if idx == 0: ax = ax.twiny()
if idx % 2: colour = 'orange'
tt[col].plot(ax=ax, rot=0)
ttt = pd.DataFrame(tt[col])
markthis = ttt[ttt[col] == 3].index[0]
ax.axvline(markthis, color='r', linestyle='--', lw=1, alpha=0.7)
ax.set_title(col,x=0.115, y=0.6, size=8.5)
ax.set_ylim(0,3.5)
ax.set_xlim(-1,111)
ax.axvspan(-1, 20, alpha=0.01, color=colour)
ax.axvspan(20, 40, alpha=0.025, color=colour)
ax.axvspan(40, 60, alpha=0.035, color=colour)
ax.axvspan(60, 80, alpha=0.025, color=colour)
ax.axvspan(80, 111, alpha=0.01, color=colour)
Who is in the portrait?¶
archies['ANZSCO_1'].value_counts(normalize=True)
Arts and Media Professionals 0.648352
Chief Executives, General Managers and Legislators 0.142857
Design, Engineering, Science and Transport Professionals 0.043956
Education Professionals 0.032967
Health Professionals 0.032967
Legal, Social and Welfare Professionals 0.032967
Protective Service Workers 0.021978
Sports and Personal Service Workers 0.021978
Hospitality, Retail and Service Managers 0.021978
Name: ANZSCO_1, dtype: float64
from matplotlib.pyplot import figure
archies['Decade'] = [ int(np.floor(int(year)/10) * 10)
for year in np.array(archies["YEAR"])]
archies['ANZSCO_1'].fillna('Uncategorised', inplace=True)
from textwrap import wrap
t1 = pd.crosstab([ '\n'.join(wrap(line, 30)) for line in archies['ANZSCO_1']], # breaks strings into new lines
archies['GENDER'])
t1['total'] = t1['Female'] + t1['Male']
t1.columns = ['Female subjects', 'Male subjects', 'total']
# plot horizontal stacvked bar chart
t1.sort_values('total').drop('total',axis=1).plot.barh(stacked=True, figsize=(10, 6), color=['#EC7E45', '#4C72B0'])
# remove y=axis title
plt.ylabel('')
# increase y-axis tick size
plt.yticks(size=12)
plt.xticks(size=12)
# add vertical lines to show the 50% mark
for x in range(1,60,1):
plt.axvline(x, color='white', linestyle='-', lw=1.25, alpha=0.75)
plt.legend(ncol=1, title='', fontsize=12, title_fontsize=10, loc='lower right', facecolor='white', edgecolor='white')
plt.title('Subjects by ANZSCO sub-major group', size=14, pad=10)
# add gird lines
plt.grid(axis='x')
# save figure
# plt.savefig('subject_by_anzsco.png', dpi=330, bbox_inches='tight')
plt.show()
from matplotlib.pyplot import figure
archies['Decade'] = [ int(np.floor(int(year)/10) * 10)
for year in np.array(archies["YEAR"])]
archies['ANZSCO_1'].fillna('Family/Friend', inplace=True)
archies['ANZSCO_1_v2'] = np.where(archies['ANZSCO_1'].isin(['Arts and Media Professionals',
'Chief Executives, General Managers and Legislators',
'Uncategorised']),archies['ANZSCO_1'],'Other')
t1 = pd.crosstab(archies['Decade'],archies['ANZSCO_1_v2'])
figure(figsize=(8, 6))
ax = t1.plot(linewidth=2, alpha=0.6)
plt.legend(ncol=4, bbox_to_anchor=(1, 1.1))
t1.plot(marker="o", markersize=6, alpha=0.9, ax=ax, linewidth=0, color=['#1f77b4','#ff7f0e','#2ca02c','#d62728'], legend=None)
# remove items from legend
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles=handles[:4], labels=labels[:4], ncol=4, bbox_to_anchor=(0.5, 1.1),
fontsize=10.5, loc='upper center', frameon=False)
# add grid lines for every 10 years
plt.grid(axis='x', alpha=0.5)
# add vertical lines for every 10 years
plt.axvline(1930, color='grey', linestyle='-', lw=1, alpha=0.2)
plt.axvline(1950, color='grey', linestyle='-', lw=1, alpha=0.2)
plt.axvline(1970, color='grey', linestyle='-', lw=1, alpha=0.2)
plt.axvline(1990, color='grey', linestyle='-', lw=1, alpha=0.2)
plt.axvline(2010, color='grey', linestyle='-', lw=1, alpha=0.2)
plt.title('Occupation of subjects by decade', size=14, pad=35)
# increae y-axis tick size
plt.yticks(size=12)
plt.xticks(size=11)
# increase x-axis title size
plt.xlabel('Decade', size=12)
# add annotations to the plot
plt.annotate('(2)', xy=(1930, 1), xytext=(1930, 5.5), size=12, color='tab:blue', ha='center')
plt.annotate('(1)', xy=(1950, 1), xytext=(1950, 5.5), size=12, color='tab:blue', ha='center')
plt.annotate('(2)', xy=(1970, 1), xytext=(1970, 4.75), size=12, color='tab:blue', ha='center')
plt.annotate('(1)', xy=(1980, 1), xytext=(1980, 6.5), size=12, color='tab:blue', ha='center')
plt.annotate('(3)', xy=(1990, 1), xytext=(1990, 8.5), size=12, color='tab:blue', ha='center')
plt.annotate('(2)', xy=(2000, 1), xytext=(2000, 10.5), size=12, color='tab:blue', ha='center')
plt.annotate('(2)', xy=(2010, 1), xytext=(2010, 8.5), size=12, color='tab:blue', ha='center')
# add box for annotation
plt.annotate('Self portrait occurrences are provided\nin brackets for relevant decades.', xy=(1918, 1), xytext=(1918, 10.25),
size=12, color='grey', ha='left', alpha=0.7, bbox=dict(boxstyle='round', fc='white', ec='white', alpha=0.8))
# increase y-axis
plt.ylim(-1, 11.75)
plt.savefig('subject_occupation_by_decade.png', dpi=330, bbox_inches='tight')
plt.show()
<Figure size 800x600 with 0 Axes>
pd.crosstab(archies['Decade'],archies['Self'])
Self | 0 | 1 |
---|---|---|
Decade | ||
1920 | 9 | 0 |
1930 | 8 | 2 |
1940 | 10 | 0 |
1950 | 9 | 1 |
1960 | 10 | 0 |
1970 | 8 | 2 |
1980 | 9 | 1 |
1990 | 7 | 3 |
2000 | 8 | 2 |
2010 | 8 | 2 |
2020 | 3 | 0 |
from matplotlib.pyplot import figure
t2 = pd.crosstab(archies['Decade'],archies['ANZSCO_2'])
figure(figsize=(8, 6))
t2.plot(marker="o", markersize=4)
plt.legend(ncol=1, bbox_to_anchor=(1, 0.7))
plt.show()
<Figure size 800x600 with 0 Axes>
archies['ANZSCO_1'].value_counts().plot(kind='barh')
plt.show()
archies['ANZSCO_2'].value_counts().plot(kind='barh')
plt.show()
Online presence of recent winners¶
2018: Yvette Coppersmith
2019: Tony Costa
2020: Vincent Namatjira
2021: Peter Wegner
2022: Blak Douglas
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
# read data
googletrends = pd.read_csv('data/Last5Winners.csv')
sns.set(style='white', context='paper', rc={'figure.figsize':(12, 5)})
# plot a time series with Interest as the y-axis and x-axis in years
ax = googletrends.plot(x='Month', y='Tony Costa', color='tab:orange', linewidth=2, zorder=1, alpha=0.7)
googletrends.plot(x='Month', y='Blak Douglas', color='tab:blue', linewidth=2, zorder=1, ax=ax, alpha=0.7)
googletrends.plot(x='Month', y='Vincent Namatjira', color='tab:red', linewidth=2, zorder=1, ax=ax, alpha=0.7)
googletrends.plot(x='Month', y='Peter Wegner', color='tab:green', linewidth=2, zorder=1, ax=ax, alpha=0.7)
googletrends.plot(x='Month', y='Yvette Coppersmith', color='tab:purple', linewidth=2, zorder=1, ax=ax, alpha=0.7)
# add point hollow
ax.scatter(googletrends[googletrends['Month'] == '2018-05'].index.values[0],
googletrends[googletrends['Month'] == '2018-05']['Yvette Coppersmith'].values[0]-.5,
s=20, color='tab:purple', zorder=2)
ax.scatter(googletrends[googletrends['Month'] == '2019-05'].index.values[0],
googletrends[googletrends['Month'] == '2019-05']['Tony Costa'].values[0]-.5,
s=20, color='tab:orange', zorder=2)
ax.scatter(googletrends[googletrends['Month'] == '2020-09'].index.values[0]-.0345,
googletrends[googletrends['Month'] == '2020-09']['Vincent Namatjira'].values[0]-.5,
s=20, color='tab:red', zorder=2)
ax.scatter(googletrends[googletrends['Month'] == '2021-06'].index.values[0],
googletrends[googletrends['Month'] == '2021-06']['Peter Wegner'].values[0]-.5,
s=20, color='tab:green', zorder=2)
ax.scatter(googletrends[googletrends['Month'] == '2022-05'].index.values[0]-.025,
googletrends[googletrends['Month'] == '2022-05']['Blak Douglas'].values[0]-.5,
s=20, color='tab:blue', zorder=2)
# add source annotation in bottom right corner
plt.annotate('Source: Google Trends', xy=(0.0125, .925), xycoords='axes fraction', fontsize=12, color='#555555', zorder=2)
# add source annotation in bottom right corner
plt.annotate("May\n'18", xy=(googletrends[googletrends['Month'] == '2018-05'].index.values[0]/googletrends.shape[0], 0.91),
xycoords='axes fraction', fontsize=10, color='#555555', ha='center')
# add source annotation in bottom right corner
plt.annotate("May\n'19", xy=((googletrends[googletrends['Month'] == '2019-05'].index.values[0]-0.5)/googletrends.shape[0], 0.91),
xycoords='axes fraction', fontsize=10, color='#555555', ha='center')
# add source annotation in bottom right corner
plt.annotate("Sept\n'20", xy=((googletrends[googletrends['Month'] == '2020-09'].index.values[0]-2)/googletrends.shape[0], 0.91),
xycoords='axes fraction', fontsize=10, color='#555555', ha='center')
# add source annotation in bottom right corner
plt.annotate("June\n'21", xy=((googletrends[googletrends['Month'] == '2021-06'].index.values[0]-3)/googletrends.shape[0], 0.91),
xycoords='axes fraction', fontsize=10, color='#555555', ha='center')
# add source annotation in bottom right corner
plt.annotate("May\n'22", xy=((googletrends[googletrends['Month'] == '2022-05'].index.values[0]-3.75)/googletrends.shape[0], 0.91),
xycoords='axes fraction', fontsize=10, color='#555555', ha='center')
# shade plot for 2018
plt.axvspan(googletrends[googletrends['Month'] == '2018-03'].index.values[0],
googletrends[googletrends['Month'] == '2018-07'].index.values[0],
color='tab:purple', alpha=0.025, zorder=3)
# shade plot for 2019
plt.axvspan(googletrends[googletrends['Month'] == '2019-03'].index.values[0],
googletrends[googletrends['Month'] == '2019-07'].index.values[0],
color='tab:orange', alpha=0.025, zorder=3)
# shade plot for 2020
plt.axvspan(googletrends[googletrends['Month'] == '2020-07'].index.values[0],
googletrends[googletrends['Month'] == '2020-11'].index.values[0],
color='tab:red', alpha=0.025, zorder=3)
# shade plot for 2021
plt.axvspan(googletrends[googletrends['Month'] == '2021-04'].index.values[0],
googletrends[googletrends['Month'] == '2021-08'].index.values[0],
color='tab:green', alpha=0.025, zorder=3)
# shade plot for 2022
plt.axvspan(googletrends[googletrends['Month'] == '2022-03'].index.values[0],
googletrends[googletrends['Month'] == '2022-07'].index.values[0],
color='tab:blue', alpha=0.025, zorder=3)
plt.xlabel('')
plt.grid(axis='y', alpha=0.5)
plt.gca().set_axisbelow(True)
plt.yticks(np.arange(0, 110, 25), fontsize=12)
plt.xticks(fontsize=12)
plt.ylim(0, 120)
plt.title('Google search term usage for the last 5 winners, 2013-2023\n\n', fontsize=16)
plt.legend(loc='upper center', fontsize=11.5, bbox_to_anchor=(0.5, 1.125), ncol=5)
# save figure
# plt.savefig('google_trends_last_five_winners.png', dpi=330, bbox_inches='tight')
plt.show()
# Numbers represent search interest relative to the highest point on the chart for the given region and time.
# A value of 100 is the peak popularity for the term. A value of 50 means that the term is half as popular.
# A score of 0 means that there was not enough data for this term."