diff --git a/DESCRIPTION b/DESCRIPTION index 1885690..03d8190 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -55,7 +55,10 @@ Suggests: gridExtra, vcd, MASS, - forcats + forcats, + emojifont, + waffle, + PieGlyph License: GPL LazyLoad: yes LazyData: yes diff --git a/man/Arbuthnot.Rd b/man/Arbuthnot.Rd index 0d935ed..8824c45 100644 --- a/man/Arbuthnot.Rd +++ b/man/Arbuthnot.Rd @@ -61,7 +61,7 @@ Campbell, R. B., Arbuthnot and the Human Sex Ratio (2001). \emph{Human Biology}, 73:4, 605-610. %\url{http://www.math.uni.edu/~campbell/arbuth.html} -Creighton, C. (1965). A History of Epidemics in Britain, 2nd edition, vol. 1 and 2. +Creighton, C. (1965). A History of Epidemics in Britain, 2nd edition, vol. 1 and 2. NY: Barnes and Noble. S. Zabell (1976). Arbuthnot, Heberden, and the \emph{Bills of Mortality}. @@ -78,5 +78,26 @@ lines(Arb.smooth$x, Arb.smooth$y, col="blue", lwd=2) # plot the total christenings to observe the anomalie in 1704 with(Arbuthnot, plot(Year,Total, type='b', ylab="Total Christenings")) + +########################################################################### +# Challenge 03: Dr. John Russell +# github.com/drjohnrussell/30DayChartChallenge/blob/main/2025/Challenge03.R +########################################################################### + +if (require("PieGlyph")) { + require(ggplot2) + Arbuthnot |> + transform(total=`Males` + `Females`) |> + ggplot(aes(x=Year, y=total)) + + geom_pie_glyph(slices=c("Males","Females"), + color="black",radius=0.4) + + scale_fill_manual(values=c("lightblue","pink")) + + theme_minimal() + + theme(legend.position="bottom") + + labs(fill="Gender", + title="John Arbuthnot's Data on Male and Female Baptisms in London", + x="Year", + y="Total Baptisms") +} } \keyword{datasets} diff --git a/man/Cholera.Rd b/man/Cholera.Rd index 60d3187..8e04968 100644 --- a/man/Cholera.Rd +++ b/man/Cholera.Rd @@ -40,7 +40,7 @@ Farr's table. } \details{ The supply of \code{water} was classified as \dQuote{Thames, between Battersea and Waterloo Bridges} -(central London), +(central London), \dQuote{New River, Rivers Lea and Ravensbourne}, and \dQuote{Thames, at Kew and Hammersmith} (western London). The factor levels use abbreviations for these. @@ -49,7 +49,7 @@ The data frame is sorted by increasing elevation above the high water mark. } \source{ Bingham P., Verlander, N. Q., Cheal M. J. (2004). -John Snow, William Farr and the 1849 outbreak of cholera that affected London: +John Snow, William Farr and the 1849 outbreak of cholera that affected London: a reworking of the data highlights the importance of the water supply. \emph{Public Health}, 118(6), 387-394, Table 2. (The data was kindly supplied by Neville Verlander, including additional variables @@ -67,7 +67,7 @@ The relevant tables are at pages clii -- clvii. data(Cholera) # plot cholera deaths vs. elevation -plot(cholera_drate ~ elevation, data=Cholera, +plot(cholera_drate ~ elevation, data=Cholera, pch=16, cex.lab=1.2, cex=1.2, xlab="Elevation above high water mark (ft)", ylab="Deaths from cholera in 1849 per 10,000") @@ -81,15 +81,15 @@ lines(mort ~ elev, lwd=2, col="blue") if(require("car", quietly=TRUE)) { # show separate regression lines for each water supply - scatterplot(cholera_drate ~ elevation | water, data=Cholera, + scatterplot(cholera_drate ~ elevation | water, data=Cholera, smooth=FALSE, pch=15:17, id=list(n=2, labels=sub(",.*", "", Cholera$district)), col=c("red", "darkgreen", "blue"), legend=list(coords="topleft", title="Water supply"), xlab="Elevation above high water mark (ft)", ylab="Deaths from cholera in 1849 per 10,000") - - scatterplot(cholera_drate ~ poor_rate | water, data=Cholera, + + scatterplot(cholera_drate ~ poor_rate | water, data=Cholera, smooth=FALSE, pch=15:17, id=list(n=2, labels=sub(",.*", "", Cholera$district)), col=c("red", "darkgreen", "blue"), @@ -99,7 +99,7 @@ if(require("car", quietly=TRUE)) { } # fit a logistic regression model a la Bingham etal. -fit <- glm( cbind(cholera_deaths, popn) ~ +fit <- glm( cbind(cholera_deaths, popn) ~ water + elevation + poor_rate + annual_deaths + pop_dens + persons_house, data=Cholera, family=binomial) @@ -113,5 +113,50 @@ if (require(effects)) { plot(eff) } +########################################################################### +# Challenge 01: Dr. John Russell +# github.com/drjohnrussell/30DayChartChallenge/blob/main/2025/Challenge01.R +# Hint: It works better if emojifont package is installed! +########################################################################### + +if (require("emojifont")) { + load.fontawesome() +} + +require(ggplot2) +plot1 <- Cholera |> + ggplot(aes(x=elevation, y=cholera_drate,color=water)) + + geom_point() + + theme_bw() + + labs(y="Cholera Death Rate per 10000 citizens (log)", + x="Elevation (log)", + color="Water Source") + + theme(legend.position="bottom") + + scale_y_log10() + + scale_x_log10() + + scale_color_brewer(palette="Dark2") + +plot1 + +if (require("waffle")) { + day1 <- Cholera |> + transform(newcholeradeaths = cholera_deaths/40) |> + sort_by(~water) |> + ggplot(aes(fill=water,values=newcholeradeaths)) + + geom_waffle(radius=grid::unit(0.4,"npc"), + size=.25) + + theme_void() + + labs(title="Deaths from Cholera in London in 1849", + subtitle="40 Deaths per Square", + caption="Data from William Farr's initial report, where he concluded + elevation \nwas the factor in the spread of plague", + fill="Water Source") + + scale_fill_brewer(palette="Dark2") + + theme(legend.position="bottom", + panel.background=element_rect(color="white")) + +day1 +} + } \keyword{datasets} diff --git a/man/Galton.Rd b/man/Galton.Rd index 3118155..bd91991 100644 --- a/man/Galton.Rd +++ b/man/Galton.Rd @@ -35,8 +35,8 @@ Galton, F. (1886). Regression Towards Mediocrity in Hereditary Stature \emph{Journal of the Anthropological Institute}, 15, 246-263 } \references{ -Friendly, M. & Denis, D. (2005). The early origins and development of the scatterplot. -\emph{Journal of the History of the Behavioral Sciences}, +Friendly, M. & Denis, D. (2005). The early origins and development of the scatterplot. +\emph{Journal of the History of the Behavioral Sciences}, 41, 103-130. Galton, F. (1869). \emph{Hereditary Genius: An Inquiry into its Laws and Consequences}. @@ -46,13 +46,13 @@ Hanley, J. A. (2004). "Transmuting" Women into Men: Galton's Family Data on Huma \emph{The American Statistician}, 58, 237-243. See: \url{http://www.medicine.mcgill.ca/epidemiology/hanley/galton/} for source materials. -Stigler, S. M. (1986). +Stigler, S. M. (1986). \emph{The History of Statistics: The Measurement of Uncertainty before 1900}. Cambridge, MA: Harvard University Press, Table 8.1 -Wachsmuth, A. W., Wilkinson L., Dallal G. E. (2003). -Galton's bend: A previously undiscovered nonlinearity in Galton's family stature regression data. -\emph{The American Statistician}, 57, 190-192. +Wachsmuth, A. W., Wilkinson L., Dallal G. E. (2003). +Galton's bend: A previously undiscovered nonlinearity in Galton's family stature regression data. +\emph{The American Statistician}, 57, 190-192. %\url{http://staff.ustc.edu.cn/~zwp/teach/Reg/galton.pdf} \doi{10.1198/0003130031874} @@ -60,7 +60,7 @@ Galton's bend: A previously undiscovered nonlinearity in Galton's family stature \seealso{ \code{link{GaltonFamilies}}, \code{\link{PearsonLee}}, - \code{galton} in the \pkg{psych} % \code{\link[psych]{galton}} + \code{galton} in the \pkg{psych} % \code{\link[psych]{galton}} } \examples{ @@ -76,7 +76,7 @@ data(Galton) # sunflower plot with regression line and data ellipses and lowess smooth ########################################################################### -with(Galton, +with(Galton, { sunflowerplot(parent,child, xlim=c(62,74), ylim=c(62,74)) reg <- lm(child ~ parent) @@ -87,5 +87,25 @@ with(Galton, } }) +########################################################################### +# Challenge 02: Dr. John Russell +# github.com/drjohnrussell/30DayChartChallenge/blob/main/2025/Challenge02.R +########################################################################### + + +require(ggplot2) + +Galton |> + ggplot(aes(x=parent,y=child)) + + geom_point(position="jitter") + + geom_density2d_filled(alpha=.6,show.legend=FALSE,bins=9) + + theme_minimal() + + geom_smooth(method="loess",se=FALSE,color="red") + + geom_smooth(method="lm",se=FALSE) + + labs(title="Galton's Height Data", + x="Parent Height (inches)", + y="Child Height (inches)") + + scale_fill_brewer(type="seq") + } \keyword{datasets} diff --git a/man/PearsonLee.Rd b/man/PearsonLee.Rd index cef9912..fca88f7 100644 --- a/man/PearsonLee.Rd +++ b/man/PearsonLee.Rd @@ -36,16 +36,16 @@ Pearson, K. and Lee, A. (1896). Mathematical contributions to the theory of evolution. On telegony in man, etc. \emph{Proceedings of the Royal Society of London}, 60 , 273-283. -Pearson, K. and Lee, A. (1903). +Pearson, K. and Lee, A. (1903). On the laws of inheritance in man: I. Inheritance of physical characters. \emph{Biometrika}, 2(4), 357-462. (Tables XXII, p. 415; XXV, p. 417; XXVIII, p. 419 and XXXI, p. 421.) } \references{ -Wachsmuth, A.W., Wilkinson L., Dallal G.E. (2003). -Galton's bend: A previously undiscovered nonlinearity in Galton's family stature regression data. -\emph{The American Statistician}, 57, 190-192. +Wachsmuth, A.W., Wilkinson L., Dallal G.E. (2003). +Galton's bend: A previously undiscovered nonlinearity in Galton's family stature regression data. +\emph{The American Statistician}, 57, 190-192. %\url{http://staff.ustc.edu.cn/~zwp/teach/Reg/galton.pdf} \doi{10.1198/0003130031874} } @@ -58,13 +58,13 @@ Galton's bend: A previously undiscovered nonlinearity in Galton's family stature data(PearsonLee) str(PearsonLee) -with(PearsonLee, +with(PearsonLee, { lim <- c(55,80) xv <- seq(55,80, .5) sunflowerplot(parent,child, number=frequency, xlim=lim, ylim=lim, seg.col="gray", size=.1) abline(lm(child ~ parent, weights=frequency), col="blue", lwd=2) - lines(xv, predict(loess(child ~ parent, weights=frequency), data.frame(parent=xv)), + lines(xv, predict(loess(child ~ parent, weights=frequency), data.frame(parent=xv)), col="blue", lwd=2) # NB: dataEllipse doesn't take frequency into account if(require(car)) { @@ -106,5 +106,21 @@ ggplot(PearsonLee, aes(x = child, y = parent, weight=frequency)) + theme(legend.position = c(0.14, 0.885), legend.background = element_rect(fill = 'white')) +########################################################################### +# Challenge 02: Dr. John Russell +# github.com/drjohnrussell/30DayChartChallenge/blob/main/2025/Challenge02.R +########################################################################### + +PearsonLee |> + ggplot(aes(x=parent,y=child)) + + geom_point() + + theme_minimal() + + geom_smooth(aes(weight=frequency), method="lm", se=FALSE, color="red",size=2) + + geom_smooth(aes(weight = frequency), method="lm", se=FALSE) + + facet_grid(par ~ chl) + + labs(title="Pearson and Lee's Data on Parent and Child", + x="Parent Height (inches)", + y="Child Height (inches)") + } \keyword{datasets}