program define hist_overlay, rclass sortpreserve *! version 1.32 22Mar2023 syntax varname [if] [in], over(varname) /// [color1(string) color2(string) /// xline1(string) xline2(string) /// title(passthru) subtitle(passthru) /// XTItle(passthru) YTItle(passthru) /// bin(passthru) Width(passthru) start(passthru) /// /* BARWidth(passthru) */ Verbose xlog /// name(passthru) saving(passthru) plusone POWers FREQuency DENsity FRACtion /// ADDLabels ADDLABOPts(string) * ] version 13.1 if "`verbose'"=="" { local shhh quietly } else { local display display } if "`frequency'`density'`fraction'"=="" { local density density } if strpos("`options'","xsc") { _parsexscale, `options' } marksample touse local vlbl : variable label `varlist' if trim("`vlbl'")=="" & "`xtitle'"=="" { lab var `varlist' "`varlist'" } tempvar y ylog x1 d1 x2 d2 // Identify two levels of the -over- variable qui levelsof `over' if `touse', clean local levels `r(levels)' local nlevels = wordcount("`levels'") if `nlevels'~=2 { di as err "The variable specified in the -over- option must have exactly two different values" exit 198 } // Find the values, value labels and legend keys for the two levels local overtype : type `over' if substr("`overtype'",1,3)=="str" { foreach i of numlist 1 2 { local lvl`i' : word `i' of `levels' local lvl`i' `"`lvl`i''"' local lvl`i'lbl `lvl`i'' local key`i' "`lvl`i'lbl'" } } else { foreach i of numlist 1 2 { local lvl`i' : word `i' of `levels' local lvl`i'lbl : label (`over') `lvl`i'' if "`lvl`i'lbl'"=="" { local key`i' "`lvl`i''" } else { local key`i' "`lvl`i'lbl'" } } } // Construct the default legend if "`addlabels'" =="" { local mylegend legend(order(1 "`key1'" 2 "`key2'") ) } else { local mylegend legend(order(1 "`key1'" 3 "`key2'") ) } // Colors assigned to the two levels of -over- if c(stata_version)>=15 { if "`color1'"=="" { local color1 navy%50 local lineclr1 navy } if "`color2'"=="" { local color2 red%50 local lineclr2 red } } else { if "`color1'"=="" { local color1 none local lineclr1 black } if "`color2'"=="" { local color2 dimgray local lineclr2 black } } // Enable -xline1- and -xline2- suboptions foreach i of numlist 1 2 { if "`xline`i''"~="" { _parsexline `xline`i'' local lines `r(lines)' local colopt `r(colopt)' local othopts `r(othopts)' if "`colopt'"=="" { local colopt lcolor(`lineclr`i'') } local xline`i' xline(`lines', `colopt' `othopts') } } if "`plusone'"~="" local plusone + 1 // Identify left and right bounds of bar graph sum `varlist' if `touse', meanonly // Across all values of the -over- variable local rawmax = `r(max)' local max = round(`rawmax') local log_max = log10(`max') // Missing if `max' <=0 local rawmin = `r(min)' local pwr10 = round(log10(abs(`rawmin')))-1 local min = round(floor(10^abs(`pwr10')*`rawmin')/10^abs(`pwr10'),10^`pwr10') local min = min(`min',`rawmin') local log_min = log10(`min') // Missing if `min' <=0 // Let `y' variable be either the original variable or its log transform if "`xlog'"=="" { qui gen `y' = `varlist' `plusone' if `touse' local ylbl : variable label `varlist' label var `y' "`ylbl'" char `y'[varname] `varlist' `plusone' if "`start'"=="" { local start start(`min') } } else { // With -xlog- option, define "nice" labels for x-axis capt which niceloglabels if _rc~=0 { di as err "To use the -xlog- option, install Nick Cox's program, {search niceloglabels:niceloglabels}" exit 198 } qui count if `touse' & `varlist' <= 0 if `r(N)' > 0 { di as err "Variable `varlist' has `r(N)' observations less than or equal to zero." /// _n "With the -xlog- option, execution continues omitting these observations." local omit_le_zero & \`varlist' <= 0 } qui niceloglabels `varlist' if `touse' `omit_le_zero', local(lbls) style(125) `powers' foreach x of numlist `lbls' { local lblswcommas `lblswcommas' `comma' `x' local comma } local rangelbls = max(`lblswcommas',.,`max') - min(`min',.,`lblswcommas') local nlbls : word count `lbls' if "`start'"=="" { local start start(`log_min') } // Format the labels assigned to the log values _parsexlabel, `options' if "`r(xlblfrmt)'"~="" { // Formatted by user local xlblfrmt `r(xlblfrmt)' } else { // Default format local afterdec = /// cond(`rangelbls' > 3, 0, /// cond(`rangelbls' > 2, 0, /// cond(`rangelbls' > .5, 1, /// cond(`rangelbls' > .1, 2, /// cond(`rangelbls' > .01, 3, /// cond(`rangelbls' >.001, 4, 5)))))) local beforedec = 12 - `afterdec' local xlblfrmt %`beforedec'.`afterdec'fc } // Without the -powers- option specified, map each of these // values in `lbls' into a duple, the first element of which is the log // of the value and the second of which is the value if "`powers'"=="" { local min : di `xlblfrmt' `min' local min = trim("`min'") local loglbls `log_min' "`min'" local i = 1 while `i' <= `nlbls' { local val : word `i' of `lbls' local logval = log10(`val') local val : di `xlblfrmt' `val' local val = trim("`val'") local loglbls `loglbls' `logval' "`val'" local i = `i' + 1 } local max : di `xlblfrmt' `max' local max = trim("`max'") local loglbls `loglbls' `log_max' "`max'" } // With the -powers- option, the local contains two words for each label. // Change the first of each of these duples into its log value else { local i = 1 while `i' < `nlbls' { local val : word `i' of `lbls' // 1st word of duple local logval = log10(`val') local i = `i' + 1 local vallbl : word `i' of `lbls' // 2nd word of duple local loglbls `loglbls' `logval' "`vallbl'" local i = `i' + 1 } } local xlabel xlabel(`loglbls') // Convert input variable to log scale qui gen `y' = log10(`varlist' `plusone') if `touse' `omit_le_zero' local yloglbl : variable label `varlist' label var `y' "`yloglbl'" char `y'[varname] log10(`varlist'`plusone') } local varname : char `y'[varname] // Create histogram plotting variables d1 and x1 for group 1 if substr("`overtype'",1,3)=="str" { local levelcond1 : di `"`over' == "`lvl1'""' local levelcond2 : di `"`over' == "`lvl2'""' } else { local levelcond1 `over' == `lvl1' local levelcond2 `over' == `lvl2' } `shhh' di _n as txt "For the first value of the variable -" as res "`over'" as txt "-, the summary statistics are:" `shhh' di _n as txt `"sum `varname' if touse & `levelcond1', det"' `shhh' sum `y' if `touse' & `levelcond1', det twoway__histogram_gen `y' if `touse' & `levelcond1', gen(`d1' `x1') /// `bin' `width' `start' `display' `frequency' `density' `fraction' char define `d1'[varname] BinHeight char define `x1'[varname] BinCenter if "`width'"=="" { local width width(`r(width)') local start start(`r(start)') local bin } local barwidth barwidth(`r(width)') local startcat1 = `r(start)' local actualbins1 = `r(bin)'-1 // Create histogram plotting variables d2 and x2 for group 2 `shhh' di _n as txt "For the second value of the variable -" as res "`over'" as txt "-, the summary statistics are:" `shhh' di _n as txt `"sum `varname' if touse & `levelcond2', det"' `shhh' sum `y' if `touse' & `levelcond2', det twoway__histogram_gen `y' if `touse' & `levelcond2', gen(`d2' `x2') /// `bin' `width' `start' `display' `frequency' `density' `fraction' char define `d2'[varname] BinHeight char define `x2'[varname] BinCenter local actualbins2 = `r(bin)'-1 local maxbins = max(`actualbins1', `actualbins2') * Format the variables on the y- and x-axes foreach i of numlist 1 2 { foreach v in d /* x */ { sum ``v'`i'', meanonly local range`i' = abs(`r(max)' - `r(min)') local afterdec = /// cond(`range`i'' > 3, 0, /// cond(`range`i'' > 2, 0, /// cond(`range`i'' > .5, 1, /// cond(`range`i'' > .1, 2, /// cond(`range`i'' > .01, 3, /// cond(`range`i'' >.001, 4, 5)))))) local beforedec = 12 - `afterdec' format %`beforedec'.`afterdec'f ``v'`i'' } } if "`verbose'"~="" { di _n as txt "The bins and their contents are:" list `d1' `x1' `d2' `x2' if `x1'<. | `x2'<., subvarname nocompress } sum `d1', meanonly local maxbinht1 = `r(max)' sum `d2', meanonly local maxbinht2 = `r(max)' local maxbinht = max(`maxbinht1', `maxbinht2') local firstbincntr = `x1'[1] tempname bindata mkmat `d1' `x1' `d2' `x2' if `x1'<. | `x2'<., matrix(`bindata') mat colnames `bindata' = bin_ht1 bin_cntr1 bin_ht2 bin_cntr2 if "`addlabels'"~="" { local mlabgph1 (sc `d1' `x1', mlabel(`d1' ) msym(none) mlabpos(12) mlabcolor(black) `addlabopts') local mlabgph2 (sc `d2' `x2', mlabel(`d2' ) msym(none) mlabpos(12) mlabcolor(black) `addlabopts') } // Construct graph overlaying the two histograms twoway /// (bar `d1' `x1', `barwidth' lc(black) fcolor(`color1') `xline1' ) `mlabgph1' /// (bar `d2' `x2', `barwidth' lc(black) fcolor(`color2') `xline2' ) `mlabgph2' , /// plotregion(margin(zero)) `xlabel' `mylegend' /// `title' `subtitle' /// `ytitle' `xtitle' /// `name' `saving' `options' tokenize "`start'", p("()") return local start = real("`3'") tokenize "`barwidth'", p("()") return local barwidth = real("`3'") return matrix bindata = `bindata' end /* End of main program */ prog define _parsexscale syntax [, XSCale(string) * ] if strpos("`xscale'","log") { di _n as err "The option -xscale(log)- is not supported." /// _n as err "Try -xlog- instead." exit 198 } end /* End of program _parsexscale */ prog define _parsexline, rclass syntax [anything] , [ lcolor(passthru) * ] local lines `anything' local colopt `lcolor' return local lines `lines' return local colopt `colopt' return local othopts `options' end /* End of program _parsexline */ * Allow -xlabel(,format(%9.3f))- to format the xaxis labels for the -xlog- option prog define _parsexlabel, rclass syntax [, XLABel(string) * ] if `"`xlabel'"'=="" { local isxlabel local xlblfrmt } else { local isxlabel isxlabel local opts = substr(`"`xlabel'"',1,strpos(`"`xlabel'"',",")-1) local subopts = substr(`"`xlabel'"',strpos(`"`xlabel'"',",")+1,.) if strpos(`"`subopts'"',"format(") { // Extract the contents of the format() option local strtfrmt = strpos(`"`subopts'"',"format(") local xlblfrmt = substr(`"`subopts'"',`strtfrmt'+7,.) local endfrmt = `strtfrmt' + 7 + strpos(`"`xlblfrmt'"',")") local xlblfrmt = substr(`"`xlblfrmt'"',1,strpos(`"`xlblfrmt'"',")")-1) local othxlabsub = substr(`"`subopts'"',1,`strtfrmt'-1) + substr(`"`subopts'"',`endfrmt'+1,.) } else { local xlblfrmt } } return local isxlabel `isxlabel' return local xlblfrmt `xlblfrmt' end /* End of program _parsexlabel */ * Version 1.1 24Feb2019 Delete the unused -at- option. * Version 1.2 20Apr2019 Add -addlabels- and -addlabopts- options * Enable user's -xlabel(,format(%9.3fc))- option to apply to -xlog- * Deny the -xscale(log)- option in favor of -xlog- * Set version to 13.1, but take advantage of version 15 opacity * Version 1.2.1 20Apr2019 Suppress -barwidth- option in favor of -width- * Version 1.3 12Dec2021 Change logic in lines 111-115 to conserve * the rounding of the minimum value, while assuring that * `start' is never greater than the minimum in the sample. * Also add a default -xtitle- when the graphed variable has no variable label * Version 1.31 13Dec2021 Tweaked line 113 * Version 1.32 22Mar2023 Enable the over(overvariable) to be a string rather than numeric * Add option -clean- to -levelsof- command in line 40. Add lines 49-57