Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,7 @@ Cargo.lock
credentials.json
*-engine.json
.cargo
# Mentat database and journal files
*.db*
# Sqlite database and journal files
*.sqlite*
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ members = [
"sync15-adapter",
"sync15/passwords",
"sync15/passwords/ffi",
"places-tool"
]

# For RSA keys cloning. Remove once openssl 0.10.8+ is released.
Expand Down
27 changes: 27 additions & 0 deletions places-tool/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
[package]
name = "places-tool"
version = "0.1.0"
authors = ["Thom Chiovoloni <tchiovoloni@mozilla.com>"]

[dependencies]
lazy_static = "1.1.0"
clap = "2.32.0"
log = "0.4.4"
env_logger = "0.5.12"
failure = "0.1.1"
tempfile = "3.0.3"
dirs = "1.0.3"
rand = "0.5.5"

[dependencies.mentat]
git = "https://github.com/mozilla/mentat.git"
tag = "v0.11.0"
# It seems like we need to use sqlcipher since other projects in this
# workspace are using sqlcipher. Otherwise we get conflicts...
default_features = false
features = ["bundled"]

[dependencies.rusqlite]
version = "0.13"
features = ["bundled", "limits", "functions"]

122 changes: 122 additions & 0 deletions places-tool/src/anonymize.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
use rand::{self, prelude::*};
use std::collections::HashMap;
use rusqlite::{self, Connection, OpenFlags};
use std::path::PathBuf;
use std::fs;
use failure;


#[derive(Default, Clone, Debug)]
struct StringAnonymizer {
table: HashMap<String, String>,
}

fn rand_string_of_len(len: usize) -> String {
let mut rng = thread_rng();
rng.sample_iter(&rand::distributions::Alphanumeric).take(len).collect()
}

impl StringAnonymizer {
fn anonymize(&mut self, s: &str) -> String {
if s.len() == 0 {
return "".into();
}
if let Some(a) = self.table.get(s) {
return a.clone();
}
for i in 0..10 {
let replacement = rand_string_of_len(s.len());
// keep trying but force it at the last time
if self.table.get(&replacement).is_some() && i != 9 {
continue;
}

self.table.insert(s.into(), replacement.clone());
return replacement;
}
unreachable!("Bug in anonymize retry loop");
}
}

#[derive(Debug, Clone)]
struct TableInfo {
name: String,
cols: Vec<String>
}

impl TableInfo {

fn for_table(name: String, conn: &Connection) -> Result<TableInfo, failure::Error> {
let stmt = conn.prepare(&format!("SELECT * FROM {}", name))?;
let cols = stmt.column_names().into_iter().map(|x| x.to_owned()).collect();
Ok(TableInfo { name, cols })
}

fn make_update(&self, updater_fn: &str) -> String {
let sets = self.cols.iter()
.map(|col| format!("{} = {}({})", col, updater_fn, col))
.collect::<Vec<_>>()
.join(",\n ");
format!("UPDATE {}\nSET {}", self.name, sets)
}
}

fn anonymize(anon_places: &Connection) -> Result<(), failure::Error> {
{
let mut anonymizer = StringAnonymizer::default();
anon_places.create_scalar_function("anonymize", 1, true, move |ctx| {
let arg = ctx.get::<rusqlite::types::Value>(0)?;
Ok(match arg {
rusqlite::types::Value::Text(s) =>
rusqlite::types::Value::Text(anonymizer.anonymize(&s)),
not_text => not_text
})
})?;
}

let schema = {
let mut stmt = anon_places.prepare("
SELECT name FROM sqlite_master
WHERE type = 'table'
AND name NOT LIKE 'sqlite_%' -- ('sqlite_sequence', 'sqlite_stat1', 'sqlite_master', anyt)
")?;
let mut rows = stmt.query(&[])?;
let mut tables = vec![];
while let Some(row_or_error) = rows.next() {
tables.push(TableInfo::for_table(row_or_error?.get("name"), &anon_places)?);
}
tables
};

for info in schema {
let sql = info.make_update("anonymize");
debug!("Executing sql:\n{}", sql);
anon_places.execute(&sql, &[])?;
}

debug!("Clearing places url_hash");
anon_places.execute("UPDATE moz_places SET url_hash = 0", &[])?;

Ok(())
}

#[derive(Debug, Clone)]
pub struct AnonymizePlaces {
pub input_path: PathBuf,
pub output_path: PathBuf,
}

impl AnonymizePlaces {

pub fn run(self) -> Result<(), failure::Error> {
fs::copy(&self.input_path, &self.output_path)?;
let anon_places = Connection::open_with_flags(&self.output_path,
OpenFlags::SQLITE_OPEN_READ_WRITE)?;
anonymize(&anon_places)?;
Ok(())
}

}



84 changes: 84 additions & 0 deletions places-tool/src/find_db.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
use failure;
use dirs;
use std::{process, fs, path::PathBuf};

#[derive(Clone, Debug, PartialEq)]
pub struct PlacesLocation {
pub profile_name: String,
pub path: PathBuf,
pub db_size: u64,
}

impl PlacesLocation {
pub fn friendly_db_size(&self) -> String {
let sizes = [
(1024 * 1024 * 1024, "Gb"),
(1024 * 1024, "Mb"),
(1024, "Kb"),
];
for (lim, suffix) in &sizes {
if self.db_size >= *lim {

return format!("~{} {}", ((self.db_size as f64 / *lim as f64) * 10.0).round() / 10.0, suffix);
}
}
format!("{} bytes", self.db_size)
}
}

pub fn get_all_places_dbs() -> Result<Vec<PlacesLocation>, failure::Error> {
let mut path = match dirs::home_dir() {
Some(dir) => dir,
None => return Err(format_err!("No home directory found!"))
};
if cfg!(windows) {
path.extend(&["AppData", "Roaming", "Mozilla", "Firefox", "Profiles"]);
} else {
let out = String::from_utf8(
process::Command::new("uname").args(&["-s"]).output()?.stdout)?;
info!("Uname says: {:?}", out);
if out.trim() == "Darwin" {
// ~/Library/Application Support/Firefox/Profiles
path.extend(&["Library", "Application Support", "Firefox", "Profiles"]);
} else {
// I'm not actually sure if this is true for all non-macos unix likes.
path.extend(&[".mozilla", "firefox"]);
}
}
debug!("Using profile path: {:?}", path);
let mut res = fs::read_dir(path)?
.map(|entry_result| {
let entry = entry_result?;
trace!("Considering path {:?}", entry.path());
if !entry.path().is_dir() {
trace!(" Not dir: {:?}", entry.path());
return Ok(None);
}
let mut path = entry.path().to_owned();
let profile_name = path.file_name().unwrap_or_default().to_str().ok_or_else(|| {
warn!(" Path has invalid UTF8: {:?}", path);
format_err!("Path has invalid UTF8: {:?}", path)
})?.into();
path.push("places.sqlite");
if !path.exists() {
return Ok(None);
}
let metadata = fs::metadata(&path)?;
let db_size = metadata.len();
Ok(Some(PlacesLocation {
profile_name,
path,
db_size,
}))
}).filter_map(|result: Result<Option<PlacesLocation>, failure::Error>| {
match result {
Ok(val) => val,
Err(e) => {
debug!("Got error finding profile directory, skipping: {}", e);
None
}
}
}).collect::<Vec<_>>();
res.sort_by(|a, b| b.db_size.cmp(&a.db_size));
Ok(res)
}
14 changes: 14 additions & 0 deletions places-tool/src/initial-data.edn
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

[
{:db/id "container0" :container/name "Default"}
{:db/id "container1" :container/name "Personal"}

{:db/id "device0" :device/name "My very cool computer" :device/type :device.type/desktop}
{:db/id "device1" :device/name "My cool phone" :device/type :device.type/mobile}

; We randomly assign one of these to each visit.
{:context/id 0 :context/device "device0" :context/container "container0"}
{:context/id 1 :context/device "device1" :context/container "container0"}
{:context/id 2 :context/device "device0" :context/container "container1"}
{:context/id 3 :context/device "device1" :context/container "container1"}
]
131 changes: 131 additions & 0 deletions places-tool/src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
extern crate dirs;
#[macro_use]
extern crate failure;
#[macro_use]
extern crate log;
#[macro_use]
extern crate mentat;
extern crate rusqlite;

#[macro_use]
extern crate lazy_static;
extern crate rand;
extern crate env_logger;
extern crate clap;
extern crate tempfile;

use std::path::{Path, PathBuf};
use std::fs;

mod find_db;
mod anonymize;
mod to_mentat;


fn main() -> Result<(), failure::Error> {
let matches = clap::App::new("places-tool")
.subcommand(clap::SubCommand::with_name("to-mentat")
.about("Convert a places database to a mentat database")
.arg(clap::Arg::with_name("OUTPUT")
.index(1)
.help("Path where we should output the mentat db (defaults to ./mentat_places.db)"))
.arg(clap::Arg::with_name("PLACES")
.index(2)
.help("Path to places.sqlite. If not provided, we'll use the largest places.sqlite in your firefox profiles"))
.arg(clap::Arg::with_name("v")
.short("v")
.multiple(true)
.help("Sets the level of verbosity (pass up to 3 times for more verbosity -- e.g. -vvv enables trace logs)"))
.arg(clap::Arg::with_name("force")
.short("f")
.long("force")
.help("Overwrite OUTPUT if it already exists"))
.arg(clap::Arg::with_name("realistic")
.short("r")
.long("realistic")
.help("Insert everything with one transaction per visit. This is a lot slower, \
but is a more realistic workload. It produces databases that are ~30% larger (for me).")))
.subcommand(clap::SubCommand::with_name("anonymize")
.about("Anonymize a places database")
.arg(clap::Arg::with_name("OUTPUT")
.index(1)
.help("Path where we should output the anonymized db (defaults to places_anonymized.sqlite)"))
.arg(clap::Arg::with_name("PLACES")
.index(2)
.help("Path to places.sqlite. If not provided, we'll use the largest places.sqlite in your firefox profiles"))
.arg(clap::Arg::with_name("v")
.short("v")
.multiple(true)
.help("Sets the level of verbosity (pass up to 3 times for more verbosity -- e.g. -vvv enables trace logs)"))
.arg(clap::Arg::with_name("force")
.short("f")
.long("force")
.help("Overwrite OUTPUT if it already exists")))
.get_matches();

let subcommand = matches.subcommand_name().map(|s| s.to_owned()).expect("Must provide subcommand");
let is_anon = subcommand == "anonymize";
let subcmd_matches = matches.subcommand_matches(&subcommand).unwrap();

env_logger::init_from_env(match subcmd_matches.occurrences_of("v") {
0 => env_logger::Env::default().filter_or("RUST_LOG", "warn"),
1 => env_logger::Env::default().filter_or("RUST_LOG", "info"),
2 => env_logger::Env::default().filter_or("RUST_LOG", "debug"),
3 | _ => env_logger::Env::default().filter_or("RUST_LOG", "trace"),
});

let places_db = if let Some(places) = subcmd_matches.value_of("PLACES") {
let meta = fs::metadata(&places)?;
find_db::PlacesLocation {
profile_name: "".into(),
path: fs::canonicalize(places)?,
db_size: meta.len(),
}
} else {
let mut dbs = find_db::get_all_places_dbs()?;
if dbs.len() == 0 {
error!("No dbs found!");
return Err(format_err!("No dbs found!"));
}
for p in &dbs {
debug!("Found: profile {:?} with a {} places.sqlite", p.profile_name, p.friendly_db_size())
}
info!("Using profile {:?}", dbs[0].profile_name);
dbs.into_iter().next().unwrap()
};

let out_db_path = subcmd_matches.value_of("OUTPUT").unwrap_or_else(|| {
if is_anon {
"./places_anonymized.sqlite"
} else {
"./mentat_places.db"
}
}).to_owned();

if Path::new(&out_db_path).exists() {
if subcmd_matches.is_present("force") {
info!("Deleting previous `{}` because -f was passed", out_db_path);
fs::remove_file(&out_db_path)?;
} else {
error!("{} already exists but `-f` argument was not provided", out_db_path);
return Err(format_err!("Output path already exists"));
}
}

if is_anon {
let cmd = anonymize::AnonymizePlaces {
input_path: places_db.path,
output_path: PathBuf::from(out_db_path)
};
cmd.run()?;
} else {
let cmd = to_mentat::PlacesToMentat {
mentat_db_path: PathBuf::from(out_db_path),
places_db_path: places_db.path,
realistic: subcmd_matches.is_present("realistic"),
};
cmd.run()?;
}

Ok(())
}
Loading