1 |
// Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 |
// Use of this source code is governed by a BSD-style license that can be |
3 |
// found in the LICENSE file. |
4 |
|
5 |
// This command-line program converts an effective-TLD data file in UTF-8 from |
6 |
// the format provided by Mozilla to the format expected by Chrome. This |
7 |
// program generates an intermediate file which is then used by gperf to |
8 |
// generate a perfect hash map. The benefit of this approach is that no time is |
9 |
// spent on program initialization to generate the map of this data. |
10 |
// |
11 |
// Running this program finds "effective_tld_names.dat" in the expected location |
12 |
// in the source checkout and generates "effective_tld_names.gperf" next to it. |
13 |
// |
14 |
// Any errors or warnings from this program are recorded in tld_cleanup.log. |
15 |
// |
16 |
// In particular, it |
17 |
// * Strips blank lines and comments, as well as notes for individual rules. |
18 |
// * Strips a single leading and/or trailing dot from each rule, if present. |
19 |
// * Logs a warning if a rule contains '!' or '*.' other than at the beginning |
20 |
// of the rule. (This also catches multiple ! or *. at the start of a rule.) |
21 |
// * Logs a warning if GURL reports a rule as invalid, but keeps the rule. |
22 |
// * Canonicalizes each rule's domain by converting it to a GURL and back. |
23 |
// * Adds explicit rules for true TLDs found in any rule. |
24 |
// * Marks entries in the file between "// ===BEGIN PRIVATE DOMAINS===" |
25 |
// and "// ===END PRIVATE DOMAINS===" as private. |
26 |
|
27 |
#include "base/at_exit.h" |
28 |
#include "base/command_line.h" |
29 |
#include "base/file_util.h" |
30 |
#include "base/files/file_path.h" |
31 |
#include "base/i18n/icu_util.h" |
32 |
#include "base/logging.h" |
33 |
#include "base/path_service.h" |
34 |
#include "base/process/memory.h" |
35 |
#include "net/tools/tld_cleanup/tld_cleanup_util.h" |
36 |
|
37 |
int main(int argc, const char* argv[]) { |
38 |
base::EnableTerminationOnHeapCorruption(); |
39 |
if (argc != 1) { |
40 |
fprintf(stderr, "Normalizes and verifies UTF-8 TLD data files\n"); |
41 |
fprintf(stderr, "Usage: %s\n", argv[0]); |
42 |
return 1; |
43 |
} |
44 |
|
45 |
// Manages the destruction of singletons. |
46 |
base::AtExitManager exit_manager; |
47 |
|
48 |
// Only use OutputDebugString in debug mode. |
49 |
#ifdef NDEBUG |
50 |
logging::LoggingDestination destination = logging::LOG_TO_FILE; |
51 |
#else |
52 |
logging::LoggingDestination destination = |
53 |
logging::LOG_TO_ALL; |
54 |
#endif |
55 |
|
56 |
base::CommandLine::Init(argc, argv); |
57 |
|
58 |
base::FilePath log_filename; |
59 |
PathService::Get(base::DIR_EXE, &log_filename); |
60 |
log_filename = log_filename.AppendASCII("tld_cleanup.log"); |
61 |
logging::LoggingSettings settings; |
62 |
settings.logging_dest = destination; |
63 |
settings.log_file = log_filename.value().c_str(); |
64 |
settings.delete_old = logging::DELETE_OLD_LOG_FILE; |
65 |
logging::InitLogging(settings); |
66 |
|
67 |
base::i18n::InitializeICU(); |
68 |
|
69 |
base::FilePath input_file; |
70 |
PathService::Get(base::DIR_SOURCE_ROOT, &input_file); |
71 |
input_file = input_file.Append(FILE_PATH_LITERAL("net")) |
72 |
.Append(FILE_PATH_LITERAL("base")) |
73 |
.Append(FILE_PATH_LITERAL( |
74 |
"registry_controlled_domains")) |
75 |
.Append(FILE_PATH_LITERAL("effective_tld_names.dat")); |
76 |
base::FilePath output_file; |
77 |
PathService::Get(base::DIR_SOURCE_ROOT, &output_file); |
78 |
output_file = output_file.Append(FILE_PATH_LITERAL("net")) |
79 |
.Append(FILE_PATH_LITERAL("base")) |
80 |
.Append(FILE_PATH_LITERAL( |
81 |
"registry_controlled_domains")) |
82 |
.Append(FILE_PATH_LITERAL( |
83 |
"effective_tld_names.gperf")); |
84 |
net::tld_cleanup::NormalizeResult result = |
85 |
net::tld_cleanup::NormalizeFile(input_file, output_file); |
86 |
if (result != net::tld_cleanup::kSuccess) { |
87 |
fprintf(stderr, |
88 |
"Errors or warnings processing file. See log in tld_cleanup.log."); |
89 |
} |
90 |
|
91 |
if (result == net::tld_cleanup::kError) |
92 |
return 1; |
93 |
return 0; |
94 |
} |