-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #21 from lsms-worldbank/list-value-labels-that-do-…
…not-match-pattern List value labels that do not match pattern
- Loading branch information
Showing
6 changed files
with
583 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
*! version XX XXXXXXXXX ADAUTHORNAME ADCONTACTINFO | ||
|
||
cap program drop lbl_list_matching_vals | ||
program define lbl_list_matching_vals, rclass | ||
|
||
version 14 | ||
|
||
syntax, pattern(string) [NEGate VERbose Varlist(varlist)] | ||
|
||
qui { | ||
|
||
* get list of variables with value labels | ||
ds `varlist', has(vallabel) | ||
local vars_w_val_lbl "`r(varlist)'" | ||
local n_vars_w_val_lbl : list sizeof vars_w_val_lbl | ||
|
||
* get list of labels for vars in varlist | ||
* providing a varlist if none specified | ||
if (mi("`varlist'")) { | ||
d, varlist | ||
local varlist = r(varlist) | ||
} | ||
local val_lbls_for_varlist "" | ||
foreach var of local varlist { | ||
local val_lbl_curr_var : value label `var' | ||
local val_lbls_for_varlist "`val_lbls_for_varlist' `val_lbl_curr_var'" | ||
} | ||
|
||
* compile the list of labels with matching elements | ||
* by working in a frame so that the data can be converted into | ||
* a data set of labels | ||
tempname val_lbls | ||
frame copy default `val_lbls' | ||
frame `val_lbls' { | ||
|
||
* create a data set of labels | ||
uselabel, clear var | ||
|
||
* capture the list of value labels with a matching element | ||
d // for computing observation count | ||
if (`r(N)' == 0) { | ||
local val_lbls_w_matching_val "" | ||
} | ||
else if (`r(N)' > 0) { | ||
|
||
* labels that match in whole data set | ||
levelsof lname if ustrregexm(label, "`pattern'"), /// | ||
local(val_lbls_w_matching_val) clean | ||
|
||
* construct list of matching variables | ||
if (mi("`negate'")) { | ||
local val_lbls_matching_in_varlist : list val_lbls_for_varlist & val_lbls_w_matching_val | ||
} | ||
if (!mi("`negate'")) { | ||
* all label names | ||
levelsof lname, local(all_val_lbls) clean | ||
* labels to exclude | ||
local val_lbls_to_exclude "`val_lbls_w_matching_val'" | ||
local val_lbls_to_exclude : list val_lbls_to_exclude & val_lbls_for_varlist | ||
* compliment of matching labels | ||
local val_lbls_matching_in_varlist : list all_val_lbls - val_lbls_to_exclude | ||
} | ||
|
||
} | ||
|
||
} | ||
|
||
* compile list of variables whose value labels have a matching element | ||
if (mi("`val_lbls_matching_in_varlist'")) { | ||
local vars_w_matching_val_lbl "" | ||
} | ||
else if (!mi("`val_lbls_matching_in_varlist'")) { | ||
* list variables with one of the variable label names piped into `has()' | ||
ds, has(vallabel `val_lbls_matching_in_varlist') | ||
local vars_w_matching_val_lbl "`r(varlist)'" | ||
* restrict to variables in the varlist with labels | ||
local vars_w_matching_val_lbl : list vars_w_matching_val_lbl & vars_w_val_lbl | ||
} | ||
|
||
* compile the list of matching labels | ||
* capture this from the val_lbls frame so that present in main frame | ||
local val_lbls_matching_in_varlist "`val_lbls_matching_in_varlist'" | ||
|
||
* compute the number of matches | ||
local n_matching_val_lbls : list sizeof val_lbls_matching_in_varlist | ||
local n_matching_vars : list sizeof vars_w_matching_val_lbl | ||
|
||
* report on findings | ||
if (`n_matching_val_lbls' == 0) { | ||
noi: di as result "No matching value labels found" | ||
} | ||
else if (`n_matching_val_lbls' > 0) { | ||
* print basic results message | ||
noi: di as result "Matching value labels found." | ||
noi: di as result "`n_matching_val_lbls' value labels attached to `n_matching_vars' variables." | ||
noi: di as result "Value labels: `val_lbls_matching_in_varlist'" | ||
noi: di as result "Variables: `vars_w_matching_val_lbl'" | ||
* if verbose mode, print out matching value label sets | ||
if (!mi("`verbose'")) { | ||
noi: label list `val_lbls_matching_in_varlist' | ||
} | ||
} | ||
|
||
* return results | ||
return local lbl_count "`n_matching_val_lbls'" | ||
return local val_lbl_list "`val_lbls_matching_in_varlist'" | ||
return local var_count "`n_matching_vars'" | ||
return local varlist "`vars_w_matching_val_lbl'" | ||
|
||
} | ||
|
||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
# Title | ||
|
||
__lbl_list_matching_vals__ - List value labels whose labels match a pattern. | ||
|
||
# Syntax | ||
|
||
__lbl_list_matching_vals__, __pattern__(_string_) [__**neg**ate__ __**ver**bose__ __**v**arlist__(_varlist_)] | ||
|
||
| _options_ | Description | | ||
|-----------|-------------| | ||
| __pattern__(_string_) | Pattern to find in an answer option. Provide either a substring or a regular expression. | | ||
| __**neg**ate__ | Inverts the search, returning value labels that do __not__match the pattern. | | ||
| __**ver**bose__ | Print out labels that match query. Output corresponds to `label list lblnames`. | | ||
| __**v**arlist__(_varlist_) | Restrict the scope of variables to consider | | ||
|
||
# Description | ||
|
||
While Stata offers some tools for searching the content of variable labels (e.g. `lookfor`), it does not have any methods for similarly searching the contents of value labels. | ||
|
||
This command aims fill this gap by: | ||
|
||
- Searching labels in value labels for a pattern | ||
- Identifying variable labels that contain labels of interest | ||
- Compiling variables that have these labels of interest attached | ||
|
||
This command can be particularly useful for checking that variable do (not) contain patterns of interest. Consider for example: | ||
|
||
- Confirming that value labels contain (e.g., no) | ||
- Identifying value labels that deviate from standards | ||
|
||
# Options | ||
|
||
__pattern__(_string_) provides the text pattern to find in the contents of value labels. Rather be the traitional Stata glob pattern, this pattern is a sub-string or a regular expression. | ||
|
||
__**neg**ate__ inverts the search, returning value labels that do __not__match the pattern. In isolation, `pattern("my_text")` looks for value labels containing `"my_text"`. With `negate`, `pattern("my_search")` search looks instead for value labels that do not contain `"my_text"`. | ||
|
||
__**ver**bose__ manages the how much output is printed. If the `verbose` option is not provided, `lbl_list_matching_vals` reports on whether any matches were found--and, if so, how many value labels match and how many variables the matching value labels describe. If the `verbose` option is specified, the command will additionally print the contents of the matching value labels as a convenience. | ||
|
||
__**v**arlist__(_varlist_) restricts the scope of the search to the user-provided variable list. By default, the command searches for matches in all variables in memory. With __varlist__(), the scope of the search can be narrowed. | ||
|
||
# Examples | ||
|
||
## Example 1: contain a pattern | ||
|
||
``` | ||
* create some fake data | ||
gen var1 = . | ||
gen var2 = . | ||
gen var3 = . | ||
gen var4 = . | ||
* create some value labels | ||
label define var1_lbl 1 "Yes" 2 "No" | ||
label define var2_lbl 1 "Oui" 2 "Non" 3 "Oui, oui" | ||
label define var4_lbl 1 "Oui" 2 "Non" | ||
* apply those labels to some, but not all, variables | ||
label values var1 var1_lbl | ||
label values var2 var2_lbl | ||
label values var4 var4_lbl | ||
* find value labels with "Oui" and/or "oui" in at least one constituent label | ||
lbl_list_matching_vals, pattern("[Oo]ui") | ||
* find value labels and print out the contents of the label, for convenience | ||
* i.e., to avoid the next step that many users might logically make: | ||
* `label list matching_lbl` | ||
lbl_list_matching_vals, pattern("[Oo]ui") verbose | ||
``` | ||
|
||
## Example 2: do not contain a pattern | ||
|
||
``` | ||
* find value labels that do not contain a certain pattern | ||
* for example, no "Oui"/"oui" in yes/no labels from a French-language survey | ||
lbl_list_matching_vals, pattern("[Oo]ui") negate | ||
``` | ||
|
||
## Example 3: contain only a certain set of characters | ||
|
||
``` | ||
* create some value labels | ||
label drop _all | ||
* var1_lbl var2_lbl var4_lbl | ||
label define var1_lbl 1 "YES" 2 "NO" | ||
label define var2_lbl 1 "Yes" 2 "No" | ||
label define var3_lbl 1 "yes" 2 "no" | ||
label define var4_lbl 1 "Où" 2 "Là" | ||
* attach them to variables created above | ||
label values var1 var1_lbl | ||
label values var2 var2_lbl | ||
label values var3 var3_lbl | ||
label values var4 var4_lbl | ||
* contains no lower-case characters | ||
lbl_list_matching_vals, pattern("[:lower:]") negate | ||
* contains no French characters | ||
lbl_list_matching_vals, pattern("[àâäÀÂÄéèêëÉÈÊËîïôöÔÖùûüçÇ]") negate | ||
``` | ||
|
||
# Feedback, bug reports and contributions | ||
|
||
Read more about these commands on [this repo](https://github.com/lsms-worldbank/labeller) where this package is developed. Please provide any feedback by [opening an issue](https://github.com/lsms-worldbank/labeller/issues). PRs with suggestions for improvements are also greatly appreciated. | ||
|
||
# Authors | ||
|
||
LSMS Team, The World Bank [email protected] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
{smcl} | ||
{* 01 Jan 1960}{...} | ||
{hline} | ||
{pstd}help file for {hi:lbl_list_matching_vals}{p_end} | ||
{hline} | ||
|
||
{title:Title} | ||
|
||
{phang}{bf:lbl_list_matching_vals} - List value labels whose labels match a pattern. | ||
{p_end} | ||
|
||
{title:Syntax} | ||
|
||
{phang}{bf:lbl_list_matching_vals} [varlist], {bf:pattern}({it:string}) [{bf:{ul:neg}ate} {bf:{ul:v}erbose}] | ||
{p_end} | ||
|
||
{synoptset 16}{...} | ||
{synopthdr:options} | ||
{synoptline} | ||
{synopt: {bf:pattern}({it:string})}Pattern to find in an answer option. Provide either a substring or a regular expression.{p_end} | ||
{synopt: {bf:{ul:neg}ate}}Inverts the search, returning value labels that do {bf:not}match the pattern.{p_end} | ||
{synopt: {bf:{ul:v}erbose}}Print out labels that match query. Output corresponds to {inp:label list lblnames}.{p_end} | ||
{synopt: {bf:varlist}({it:varlist})}Restrict the scope of variables to consider{p_end} | ||
{synoptline} | ||
|
||
{title:Description} | ||
|
||
{pstd}While Stata offers some tools for searching the content of variable labels (e.g. {inp:lookfor}), it does not have any methods for similarly searching the contents of value labels. | ||
{p_end} | ||
|
||
{pstd}This command aims fill this gap by: | ||
{p_end} | ||
|
||
{pstd}- Searching labels in value labels for a pattern | ||
- Identifying variable labels that contain labels of interest | ||
- Compiling variables that have these labels of interest attached | ||
{p_end} | ||
|
||
{pstd}This command can be particularly useful for checking that variable do (not) contain patterns of interest. Consider for example: | ||
{p_end} | ||
|
||
{pstd}- Confirming that value labels contain (e.g., no) | ||
- Identifying value labels that deviate from standards | ||
{p_end} | ||
|
||
{title:Options} | ||
|
||
{pstd}{bf:pattern}({it:string}) provides the text pattern to find in the contents of value labels. Rather be the traitional Stata glob pattern, this pattern is a sub-string or a regular expression. | ||
{p_end} | ||
|
||
{pstd}{bf:negate} inverts the search, returning value labels that do {bf:not}match the pattern. In isolation, {inp:pattern({c 34}my_text{c 34})} looks for value labels containing {inp:{c 34}my_text{c 34}}. With {inp:negate}, {inp:pattern({c 34}my_search{c 34})} search looks instead for value labels that do not contain {inp:{c 34}my_text{c 34}}. | ||
{p_end} | ||
|
||
{pstd}{bf:verbose} manages the how much output is printed. If the {inp:verbose} option is not provided, {inp:lbl_list_matching_vals} reports on whether any matches were found--and, if so, how many value labels match and how many variables the matching value labels describe. If the {inp:verbose} option is specified, the command will additionally print the contents of the matching value labels as a convenience. | ||
{p_end} | ||
|
||
{title:Examples} | ||
|
||
{dlgtab:Example 1: contain a pattern} | ||
|
||
{input}{space 8}* create some fake data | ||
{space 8}gen var1 = . | ||
{space 8}gen var2 = . | ||
{space 8}gen var3 = . | ||
{space 8}gen var4 = . | ||
{space 8} | ||
{space 8}* create some value labels | ||
{space 8}label define var1_lbl 1 "Yes" 2 "No" | ||
{space 8}label define var2_lbl 1 "Oui" 2 "Non" 3 "Oui, oui" | ||
{space 8}label define var4_lbl 1 "Oui" 2 "Non" | ||
{space 8} | ||
{space 8}* apply those labels to some, but not all, variables | ||
{space 8}label values var1 var1_lbl | ||
{space 8}label values var2 var2_lbl | ||
{space 8}label values var4 var4_lbl | ||
{space 8} | ||
{space 8}* find value labels with "Oui" and/or "oui" in at least one constituent label | ||
{space 8}lbl_list_matching_vals, pattern("[Oo]ui") | ||
{space 8} | ||
{space 8}* find value labels and print out the contents of the label, for convenience | ||
{space 8}* i.e., to avoid the next step that many users might logically make: | ||
{space 8}* `label list matching_lbl` | ||
{space 8}lbl_list_matching_vals, pattern("[Oo]ui") verbose | ||
{text} | ||
{dlgtab:Example 2: do not contain a pattern} | ||
|
||
{input}{space 8}* find value labels that do not contain a certain pattern | ||
{space 8}* for example, no "Oui"/"oui" in yes/no labels from a French-language survey | ||
{space 8}lbl_list_matching_vals, pattern("[Oo]ui") negate | ||
{text} | ||
{dlgtab:Example 3: contain only a certain set of characters} | ||
|
||
{input}{space 8}* create some value labels | ||
{space 8}label drop _all | ||
{space 8}* var1_lbl var2_lbl var4_lbl | ||
{space 8}label define var1_lbl 1 "YES" 2 "NO" | ||
{space 8}label define var2_lbl 1 "Yes" 2 "No" | ||
{space 8}label define var3_lbl 1 "yes" 2 "no" | ||
{space 8}label define var4_lbl 1 "Où" 2 "Là" | ||
{space 8} | ||
{space 8}* attach them to variables created above | ||
{space 8}label values var1 var1_lbl | ||
{space 8}label values var2 var2_lbl | ||
{space 8}label values var3 var3_lbl | ||
{space 8}label values var4 var4_lbl | ||
{space 8} | ||
{space 8}* contains no lower-case characters | ||
{space 8}lbl_list_matching_vals, pattern("[:lower:]") negate | ||
{space 8} | ||
{space 8}* contains no French characters | ||
{space 8}lbl_list_matching_vals, pattern("[àâäÀÂÄéèêëÉÈÊËîïôöÔÖùûüçÇ]") negate | ||
{space 8} | ||
{text} | ||
{title:Feedback, bug reports and contributions} | ||
|
||
{pstd}Read more about these commands on {browse "https://github.com/lsms-worldbank/labeller":this repo} where this package is developed. Please provide any feedback by {browse "https://github.com/lsms-worldbank/labeller/issues":opening an issue}. PRs with suggestions for improvements are also greatly appreciated. | ||
{p_end} | ||
|
||
{title:Authors} | ||
|
||
{pstd}LSMS Team, The World Bank [email protected] | ||
{p_end} |
Oops, something went wrong.