Skip to content

Commit

Permalink
Merge pull request #21 from lsms-worldbank/list-value-labels-that-do-…
Browse files Browse the repository at this point in the history
…not-match-pattern

List value labels that do not match pattern
  • Loading branch information
kbjarkefur authored Jan 17, 2024
2 parents 6236a1f + 2003156 commit a256c0c
Show file tree
Hide file tree
Showing 6 changed files with 583 additions and 1 deletion.
112 changes: 112 additions & 0 deletions src/ado/lbl_list_matching_vals.ado
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
*! version XX XXXXXXXXX ADAUTHORNAME ADCONTACTINFO

cap program drop lbl_list_matching_vals
program define lbl_list_matching_vals, rclass

version 14

syntax, pattern(string) [NEGate VERbose Varlist(varlist)]

qui {

* get list of variables with value labels
ds `varlist', has(vallabel)
local vars_w_val_lbl "`r(varlist)'"
local n_vars_w_val_lbl : list sizeof vars_w_val_lbl

* get list of labels for vars in varlist
* providing a varlist if none specified
if (mi("`varlist'")) {
d, varlist
local varlist = r(varlist)
}
local val_lbls_for_varlist ""
foreach var of local varlist {
local val_lbl_curr_var : value label `var'
local val_lbls_for_varlist "`val_lbls_for_varlist' `val_lbl_curr_var'"
}

* compile the list of labels with matching elements
* by working in a frame so that the data can be converted into
* a data set of labels
tempname val_lbls
frame copy default `val_lbls'
frame `val_lbls' {

* create a data set of labels
uselabel, clear var

* capture the list of value labels with a matching element
d // for computing observation count
if (`r(N)' == 0) {
local val_lbls_w_matching_val ""
}
else if (`r(N)' > 0) {

* labels that match in whole data set
levelsof lname if ustrregexm(label, "`pattern'"), ///
local(val_lbls_w_matching_val) clean

* construct list of matching variables
if (mi("`negate'")) {
local val_lbls_matching_in_varlist : list val_lbls_for_varlist & val_lbls_w_matching_val
}
if (!mi("`negate'")) {
* all label names
levelsof lname, local(all_val_lbls) clean
* labels to exclude
local val_lbls_to_exclude "`val_lbls_w_matching_val'"
local val_lbls_to_exclude : list val_lbls_to_exclude & val_lbls_for_varlist
* compliment of matching labels
local val_lbls_matching_in_varlist : list all_val_lbls - val_lbls_to_exclude
}

}

}

* compile list of variables whose value labels have a matching element
if (mi("`val_lbls_matching_in_varlist'")) {
local vars_w_matching_val_lbl ""
}
else if (!mi("`val_lbls_matching_in_varlist'")) {
* list variables with one of the variable label names piped into `has()'
ds, has(vallabel `val_lbls_matching_in_varlist')
local vars_w_matching_val_lbl "`r(varlist)'"
* restrict to variables in the varlist with labels
local vars_w_matching_val_lbl : list vars_w_matching_val_lbl & vars_w_val_lbl
}

* compile the list of matching labels
* capture this from the val_lbls frame so that present in main frame
local val_lbls_matching_in_varlist "`val_lbls_matching_in_varlist'"

* compute the number of matches
local n_matching_val_lbls : list sizeof val_lbls_matching_in_varlist
local n_matching_vars : list sizeof vars_w_matching_val_lbl

* report on findings
if (`n_matching_val_lbls' == 0) {
noi: di as result "No matching value labels found"
}
else if (`n_matching_val_lbls' > 0) {
* print basic results message
noi: di as result "Matching value labels found."
noi: di as result "`n_matching_val_lbls' value labels attached to `n_matching_vars' variables."
noi: di as result "Value labels: `val_lbls_matching_in_varlist'"
noi: di as result "Variables: `vars_w_matching_val_lbl'"
* if verbose mode, print out matching value label sets
if (!mi("`verbose'")) {
noi: label list `val_lbls_matching_in_varlist'
}
}

* return results
return local lbl_count "`n_matching_val_lbls'"
return local val_lbl_list "`val_lbls_matching_in_varlist'"
return local var_count "`n_matching_vars'"
return local varlist "`vars_w_matching_val_lbl'"

}

end
2 changes: 1 addition & 1 deletion src/dev/run-adodown-util.do
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
}
* Fill in your root path here
if "`c(username)'" == "wb393438" {
global clone "C:\Users\wb393438\stata_funs\labeller"
global clone "C:\Users\wb393438\stata_funs\labeller\labeller"
}

// ad_setup, adf("${clone}") ///
Expand Down
2 changes: 2 additions & 0 deletions src/labeller.pkg
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,14 @@ f ado/lbl_list_no_varlbl.ado
f ado/lbl_replace_pipe.ado
f ado/lbl_assert_no_pipes.ado
f ado/lbl_list_pipes.ado
f ado/lbl_list_matching_vals.ado
f ado/labeller.ado
f ado/lbl_assert_no_long_varlbl.ado
f ado/lbl_list_long_varlbl.ado
f ado/lbl_list_matching_vars.ado

*** helpfiles
f sthlp/lbl_list_matching_vals.sthlp
f sthlp/lbl_assert_varlbls.sthlp
f sthlp/lbl_list_no_varlbl.sthlp
f sthlp/lbl_assert_no_long_varlbl.sthlp
Expand Down
110 changes: 110 additions & 0 deletions src/mdhlp/lbl_list_matching_vals.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
# Title

__lbl_list_matching_vals__ - List value labels whose labels match a pattern.

# Syntax

__lbl_list_matching_vals__, __pattern__(_string_) [__**neg**ate__ __**ver**bose__ __**v**arlist__(_varlist_)]

| _options_ | Description |
|-----------|-------------|
| __pattern__(_string_) | Pattern to find in an answer option. Provide either a substring or a regular expression. |
| __**neg**ate__ | Inverts the search, returning value labels that do __not__match the pattern. |
| __**ver**bose__ | Print out labels that match query. Output corresponds to `label list lblnames`. |
| __**v**arlist__(_varlist_) | Restrict the scope of variables to consider |

# Description

While Stata offers some tools for searching the content of variable labels (e.g. `lookfor`), it does not have any methods for similarly searching the contents of value labels.

This command aims fill this gap by:

- Searching labels in value labels for a pattern
- Identifying variable labels that contain labels of interest
- Compiling variables that have these labels of interest attached

This command can be particularly useful for checking that variable do (not) contain patterns of interest. Consider for example:

- Confirming that value labels contain (e.g., no)
- Identifying value labels that deviate from standards

# Options

__pattern__(_string_) provides the text pattern to find in the contents of value labels. Rather be the traitional Stata glob pattern, this pattern is a sub-string or a regular expression.

__**neg**ate__ inverts the search, returning value labels that do __not__match the pattern. In isolation, `pattern("my_text")` looks for value labels containing `"my_text"`. With `negate`, `pattern("my_search")` search looks instead for value labels that do not contain `"my_text"`.

__**ver**bose__ manages the how much output is printed. If the `verbose` option is not provided, `lbl_list_matching_vals` reports on whether any matches were found--and, if so, how many value labels match and how many variables the matching value labels describe. If the `verbose` option is specified, the command will additionally print the contents of the matching value labels as a convenience.

__**v**arlist__(_varlist_) restricts the scope of the search to the user-provided variable list. By default, the command searches for matches in all variables in memory. With __varlist__(), the scope of the search can be narrowed.

# Examples

## Example 1: contain a pattern

```
* create some fake data
gen var1 = .
gen var2 = .
gen var3 = .
gen var4 = .
* create some value labels
label define var1_lbl 1 "Yes" 2 "No"
label define var2_lbl 1 "Oui" 2 "Non" 3 "Oui, oui"
label define var4_lbl 1 "Oui" 2 "Non"
* apply those labels to some, but not all, variables
label values var1 var1_lbl
label values var2 var2_lbl
label values var4 var4_lbl
* find value labels with "Oui" and/or "oui" in at least one constituent label
lbl_list_matching_vals, pattern("[Oo]ui")
* find value labels and print out the contents of the label, for convenience
* i.e., to avoid the next step that many users might logically make:
* `label list matching_lbl`
lbl_list_matching_vals, pattern("[Oo]ui") verbose
```

## Example 2: do not contain a pattern

```
* find value labels that do not contain a certain pattern
* for example, no "Oui"/"oui" in yes/no labels from a French-language survey
lbl_list_matching_vals, pattern("[Oo]ui") negate
```

## Example 3: contain only a certain set of characters

```
* create some value labels
label drop _all
* var1_lbl var2_lbl var4_lbl
label define var1_lbl 1 "YES" 2 "NO"
label define var2_lbl 1 "Yes" 2 "No"
label define var3_lbl 1 "yes" 2 "no"
label define var4_lbl 1 "Où" 2 "Là"
* attach them to variables created above
label values var1 var1_lbl
label values var2 var2_lbl
label values var3 var3_lbl
label values var4 var4_lbl
* contains no lower-case characters
lbl_list_matching_vals, pattern("[:lower:]") negate
* contains no French characters
lbl_list_matching_vals, pattern("[àâäÀÂÄéèêëÉÈÊËîïôöÔÖùûüçÇ]") negate
```

# Feedback, bug reports and contributions

Read more about these commands on [this repo](https://github.com/lsms-worldbank/labeller) where this package is developed. Please provide any feedback by [opening an issue](https://github.com/lsms-worldbank/labeller/issues). PRs with suggestions for improvements are also greatly appreciated.

# Authors

LSMS Team, The World Bank [email protected]
122 changes: 122 additions & 0 deletions src/sthlp/lbl_list_matching_vals.sthlp
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
{smcl}
{* 01 Jan 1960}{...}
{hline}
{pstd}help file for {hi:lbl_list_matching_vals}{p_end}
{hline}

{title:Title}

{phang}{bf:lbl_list_matching_vals} - List value labels whose labels match a pattern.
{p_end}

{title:Syntax}

{phang}{bf:lbl_list_matching_vals} [varlist], {bf:pattern}({it:string}) [{bf:{ul:neg}ate} {bf:{ul:v}erbose}]
{p_end}

{synoptset 16}{...}
{synopthdr:options}
{synoptline}
{synopt: {bf:pattern}({it:string})}Pattern to find in an answer option. Provide either a substring or a regular expression.{p_end}
{synopt: {bf:{ul:neg}ate}}Inverts the search, returning value labels that do {bf:not}match the pattern.{p_end}
{synopt: {bf:{ul:v}erbose}}Print out labels that match query. Output corresponds to {inp:label list lblnames}.{p_end}
{synopt: {bf:varlist}({it:varlist})}Restrict the scope of variables to consider{p_end}
{synoptline}

{title:Description}

{pstd}While Stata offers some tools for searching the content of variable labels (e.g. {inp:lookfor}), it does not have any methods for similarly searching the contents of value labels.
{p_end}

{pstd}This command aims fill this gap by:
{p_end}

{pstd}- Searching labels in value labels for a pattern
- Identifying variable labels that contain labels of interest
- Compiling variables that have these labels of interest attached
{p_end}

{pstd}This command can be particularly useful for checking that variable do (not) contain patterns of interest. Consider for example:
{p_end}

{pstd}- Confirming that value labels contain (e.g., no)
- Identifying value labels that deviate from standards
{p_end}

{title:Options}

{pstd}{bf:pattern}({it:string}) provides the text pattern to find in the contents of value labels. Rather be the traitional Stata glob pattern, this pattern is a sub-string or a regular expression.
{p_end}

{pstd}{bf:negate} inverts the search, returning value labels that do {bf:not}match the pattern. In isolation, {inp:pattern({c 34}my_text{c 34})} looks for value labels containing {inp:{c 34}my_text{c 34}}. With {inp:negate}, {inp:pattern({c 34}my_search{c 34})} search looks instead for value labels that do not contain {inp:{c 34}my_text{c 34}}.
{p_end}

{pstd}{bf:verbose} manages the how much output is printed. If the {inp:verbose} option is not provided, {inp:lbl_list_matching_vals} reports on whether any matches were found--and, if so, how many value labels match and how many variables the matching value labels describe. If the {inp:verbose} option is specified, the command will additionally print the contents of the matching value labels as a convenience.
{p_end}

{title:Examples}

{dlgtab:Example 1: contain a pattern}

{input}{space 8}* create some fake data
{space 8}gen var1 = .
{space 8}gen var2 = .
{space 8}gen var3 = .
{space 8}gen var4 = .
{space 8}
{space 8}* create some value labels
{space 8}label define var1_lbl 1 "Yes" 2 "No"
{space 8}label define var2_lbl 1 "Oui" 2 "Non" 3 "Oui, oui"
{space 8}label define var4_lbl 1 "Oui" 2 "Non"
{space 8}
{space 8}* apply those labels to some, but not all, variables
{space 8}label values var1 var1_lbl
{space 8}label values var2 var2_lbl
{space 8}label values var4 var4_lbl
{space 8}
{space 8}* find value labels with "Oui" and/or "oui" in at least one constituent label
{space 8}lbl_list_matching_vals, pattern("[Oo]ui")
{space 8}
{space 8}* find value labels and print out the contents of the label, for convenience
{space 8}* i.e., to avoid the next step that many users might logically make:
{space 8}* `label list matching_lbl`
{space 8}lbl_list_matching_vals, pattern("[Oo]ui") verbose
{text}
{dlgtab:Example 2: do not contain a pattern}

{input}{space 8}* find value labels that do not contain a certain pattern
{space 8}* for example, no "Oui"/"oui" in yes/no labels from a French-language survey
{space 8}lbl_list_matching_vals, pattern("[Oo]ui") negate
{text}
{dlgtab:Example 3: contain only a certain set of characters}

{input}{space 8}* create some value labels
{space 8}label drop _all
{space 8}* var1_lbl var2_lbl var4_lbl
{space 8}label define var1_lbl 1 "YES" 2 "NO"
{space 8}label define var2_lbl 1 "Yes" 2 "No"
{space 8}label define var3_lbl 1 "yes" 2 "no"
{space 8}label define var4_lbl 1 "" 2 ""
{space 8}
{space 8}* attach them to variables created above
{space 8}label values var1 var1_lbl
{space 8}label values var2 var2_lbl
{space 8}label values var3 var3_lbl
{space 8}label values var4 var4_lbl
{space 8}
{space 8}* contains no lower-case characters
{space 8}lbl_list_matching_vals, pattern("[:lower:]") negate
{space 8}
{space 8}* contains no French characters
{space 8}lbl_list_matching_vals, pattern("[àâäÀÂÄéèêëÉÈÊËîïôöÔÖùûüçÇ]") negate
{space 8}
{text}
{title:Feedback, bug reports and contributions}

{pstd}Read more about these commands on {browse "https://github.com/lsms-worldbank/labeller":this repo} where this package is developed. Please provide any feedback by {browse "https://github.com/lsms-worldbank/labeller/issues":opening an issue}. PRs with suggestions for improvements are also greatly appreciated.
{p_end}

{title:Authors}

{pstd}LSMS Team, The World Bank [email protected]
{p_end}
Loading

0 comments on commit a256c0c

Please sign in to comment.