-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Initial check-in of app-ads.txt parsing libraries.
- Loading branch information
Showing
7 changed files
with
734 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,69 @@ | ||
# app-ads.txt-parser | ||
# app-ads.txt URL parsing reference implementation | ||
|
||
This repository contains a reference implementation of app-ads.txt parsing utilities, along with test cases for verifying compatibility. These packages are written in the Go software language, although the code and unit tests can be easily ported to another language if needed. | ||
|
||
Currently included are two packages: | ||
|
||
* `appstoreparse`: an app store listing page HTML parser for extracting HTML `<meta>` tags containing app metadata | ||
* `urlcanonical`: a URL canonicalization package for converting app developer URLs to the corresponding app-ads.txt paths to crawl | ||
|
||
The package includes a crawl sample app written in Go, useful for testing compatibility of an individual app store URL from the command line. | ||
|
||
## Set up environment | ||
|
||
If not already installed, download and install the Go distribution from https://golang.org/ | ||
|
||
## Running sample app | ||
|
||
To run the crawl sample app against an internal sample web server simulating an app listing page HTML file (`sample_app_store.html`), specify the desired port using the `--sample_file_server_port` flag. The app will crawl the app store URL, output the parse results, and then immediately terminate. | ||
|
||
``` | ||
cd examples/appadstxtcrawl | ||
go run sample_app.go --app_store_url=http://localhost:8081/sample_app_store.html --sample_file_server_port=8081 | ||
``` | ||
|
||
Sample output: | ||
|
||
``` | ||
Parsed metadata: | ||
Developer URL: https://sample.path.to/page | ||
Bundle ID: com.example.myapp | ||
Store ID: SKU12345 | ||
Derived app-ads.txt URLs: | ||
Registerable Domain URL: https://path.to/app-ads.txt | ||
Subdomain URL: https://sample.path.to/app-ads.txt | ||
``` | ||
|
||
The app can be run against any other app store URL desired. For example, this command line demonstrates crawling an app in Google Play Store. | ||
|
||
``` | ||
cd examples/appadstxtcrawl | ||
go run sample_app.go --app_store_url=https://play.google.com/store/apps/details?id=com.google.android.apps.maps | ||
``` | ||
|
||
Sample output: | ||
|
||
``` | ||
Parsed metadata: | ||
Developer URL: http://maps.google.com/about/ | ||
Bundle ID: com.google.android.apps.maps | ||
Store ID: com.google.android.apps.maps | ||
Derived app-ads.txt URLs: | ||
Registerable Domain URL: http://google.com/app-ads.txt | ||
Subdomain URL: http://maps.google.com/app-ads.txt | ||
``` | ||
|
||
Note: if the app store URL does not provide the required HTML meta tags, the | ||
script will display an empty result such as the following: | ||
|
||
``` | ||
Parsed metadata: | ||
Developer URL: | ||
Bundle ID: | ||
Store ID: | ||
No developer URL found to parse. | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
package appstoreparse | ||
|
||
/* | ||
* Copyright 2018 IAB Tech Lab & OpenRTB Group | ||
* Copyright 2018 Google LLC | ||
* | ||
* Author: Curtis Light, Google | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* https://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
import ( | ||
"fmt" | ||
"strings" | ||
|
||
"golang.org/x/net/html" | ||
) | ||
|
||
// AppStoreMetadata holds the values parsed from the app listing HTML. | ||
type AppStoreMetadata struct { | ||
|
||
// Value found in the appstore:developer_url <meta> tag. | ||
DeveloperURL string | ||
|
||
// Value found in the appstore:bundle_id <meta> tag. | ||
BundleID string | ||
|
||
// Value found in the appstore:store_id <meta> tag. | ||
StoreID string | ||
} | ||
|
||
// ParseAppStorePageHTML accepts a string containing an HTML doc, returning | ||
// a struct containing the parsed "appstore:" meta tag values. | ||
func ParseAppStorePageHTML(htmlContent string) (*AppStoreMetadata, error) { | ||
result := AppStoreMetadata{} | ||
doc, err := html.Parse(strings.NewReader(htmlContent)) | ||
if err != nil { | ||
return nil, fmt.Errorf("Error running HTML parser: %v", err) | ||
} | ||
var handleNode func(*html.Node, bool) | ||
handleNode = func(n *html.Node, parentNoteIsHeadTag bool) { | ||
// Ignore any elements contained within the <body> tag. | ||
if n.Type == html.ElementNode && n.Data == "body" { | ||
return | ||
} | ||
|
||
if n.Type == html.ElementNode && n.Data == "meta" && parentNoteIsHeadTag { | ||
var nameAttribute, contentAttribute string | ||
for _, a := range n.Attr { | ||
if a.Key == "name" { | ||
nameAttribute = a.Val | ||
} | ||
if a.Key == "content" { | ||
contentAttribute = a.Val | ||
} | ||
} | ||
if nameAttribute == "appstore:developer_url" { | ||
result.DeveloperURL = contentAttribute | ||
} | ||
if nameAttribute == "appstore:bundle_id" { | ||
result.BundleID = contentAttribute | ||
} | ||
if nameAttribute == "appstore:store_id" { | ||
result.StoreID = contentAttribute | ||
} | ||
} | ||
|
||
// See if this element is a <head> tag to check if <meta> tags are | ||
// immediate children. | ||
currentNodeIsHeadTag := n.Type == html.ElementNode && n.Data == "head" | ||
for c := n.FirstChild; c != nil; c = c.NextSibling { | ||
handleNode(c, currentNodeIsHeadTag) | ||
} | ||
} | ||
handleNode(doc, false) | ||
return &result, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
package appstoreparse | ||
|
||
/* | ||
* Copyright 2018 IAB Tech Lab & OpenRTB Group | ||
* Copyright 2018 Google LLC | ||
* | ||
* Author: Curtis Light, Google | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* https://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
import ( | ||
"reflect" | ||
"testing" | ||
) | ||
|
||
// Demonstrates parsing a complete, valid HTML5 doc with compliant meta tags. | ||
const validBasicHTML = ` | ||
<!DOCTYPE html> | ||
<html lang="en"> | ||
<head> | ||
<title> | ||
Example app listing | ||
</title> | ||
<meta charset="UTF-8"> | ||
<meta name="appstore:developer_url" content="https://www.path.to/page"> | ||
<meta name="appstore:bundle_id" content="com.example.myapp"> | ||
<meta name="appstore:store_id" content="SKU12345"> | ||
</head> | ||
<body> | ||
<h1> | ||
Example app | ||
</h1> | ||
</body> | ||
</html>` | ||
|
||
// Confirms that parsing still works correctly with HTML tags and attributes in | ||
// uppercase | ||
const validBasicHTMLUppercase = ` | ||
<!DOCTYPE HTML> | ||
<HTML LANG="en"> | ||
<HEAD> | ||
<TITLE> | ||
Example app listing | ||
</TITLE> | ||
<META CHARSET="UTF-8"> | ||
<META NAME="appstore:developer_url" CONTENT="https://www.path.to/page"> | ||
<META NAME="appstore:bundle_id" CONTENT="com.example.myapp"> | ||
<META NAME="appstore:store_id" CONTENT="SKU12345"> | ||
</HEAD> | ||
<BODY> | ||
<H1> | ||
Example app | ||
</H1> | ||
</BODY> | ||
</HTML>` | ||
|
||
// Demonstrates parsing a valid HTML5 doc with compliant meta tags, truncated | ||
// at the closing </head> tag. | ||
const validTruncatedHTML = ` | ||
<!DOCTYPE html> | ||
<html lang="en"> | ||
<head> | ||
<title> | ||
Example app listing | ||
</title> | ||
<meta charset="UTF-8"> | ||
<meta name="appstore:developer_url" content="https://www.path.to/page"> | ||
<meta name="appstore:bundle_id" content="com.example.myapp"> | ||
<meta name="appstore:store_id" content="SKU12345"> | ||
</head>` | ||
|
||
// Demonstrates <meta> tags in HTML <body> tag which will be ignored, as the | ||
// tags must reside within the <head> tag. | ||
const metaInBodyNonCompliant = ` | ||
<!DOCTYPE html> | ||
<html lang="en"> | ||
<head> | ||
<title> | ||
Example app listing | ||
</title> | ||
<meta charset="UTF-8"> | ||
</head> | ||
<body> | ||
<meta name="appstore:developer_url" content="https://www.path.to/page"> | ||
<meta name="appstore:bundle_id" content="com.example.myapp"> | ||
<meta name="appstore:store_id" content="SKU12345"> | ||
<h1> | ||
Example app | ||
</h1> | ||
</body> | ||
</html>` | ||
|
||
var testScenarios = []struct { | ||
input string | ||
wantMetadata AppStoreMetadata | ||
}{ | ||
{ | ||
validBasicHTML, | ||
AppStoreMetadata{ | ||
DeveloperURL: "https://www.path.to/page", | ||
BundleID: "com.example.myapp", | ||
StoreID: "SKU12345", | ||
}, | ||
}, | ||
{ | ||
validBasicHTMLUppercase, | ||
AppStoreMetadata{ | ||
DeveloperURL: "https://www.path.to/page", | ||
BundleID: "com.example.myapp", | ||
StoreID: "SKU12345", | ||
}, | ||
}, | ||
{ | ||
validTruncatedHTML, | ||
AppStoreMetadata{ | ||
DeveloperURL: "https://www.path.to/page", | ||
BundleID: "com.example.myapp", | ||
StoreID: "SKU12345", | ||
}, | ||
}, | ||
{ | ||
metaInBodyNonCompliant, | ||
AppStoreMetadata{}, | ||
}, | ||
{ | ||
"", // Empty HTML document | ||
AppStoreMetadata{}, | ||
}, | ||
} | ||
|
||
func TestParseAppStorePageHTML(t *testing.T) { | ||
for _, scenario := range testScenarios { | ||
t.Run(scenario.input, func(t *testing.T) { | ||
haveMetadata, err := ParseAppStorePageHTML(scenario.input) | ||
if !reflect.DeepEqual(*haveMetadata, scenario.wantMetadata) { | ||
t.Errorf("have [%v] want [%v]", haveMetadata, scenario.wantMetadata) | ||
} | ||
|
||
if err != nil { | ||
t.Error("Received unexpected error") | ||
} | ||
}) | ||
} | ||
} |
Oops, something went wrong.