From 90cec75cde1fed0c389712a075b010b99e79dae0 Mon Sep 17 00:00:00 2001 From: Jonathan S <1924550@stud.hs-mannheim.de> Date: Sat, 29 Jun 2024 11:29:35 +0200 Subject: [PATCH] We did this all in one go --- .gitignore | 1 + README.md | 107 ++ cqlsh/cqlshrc | 10 + docker-compose.yml | 135 ++ etc/_copy_cass1/README.txt | 13 + etc/_copy_cass1/cassandra-env.sh | 307 ++++ etc/_copy_cass1/cassandra-jaas.config | 4 + etc/_copy_cass1/cassandra-rackdc.properties | 39 + etc/_copy_cass1/cassandra-topology.properties | 41 + etc/_copy_cass1/cassandra.yaml | 1419 +++++++++++++++++ .../commitlog_archiving.properties | 48 + etc/_copy_cass1/cqlshrc.sample | 238 +++ etc/_copy_cass1/hotspot_compiler | 32 + etc/_copy_cass1/jvm-clients.options | 10 + etc/_copy_cass1/jvm-server.options | 188 +++ etc/_copy_cass1/jvm11-clients.options | 29 + etc/_copy_cass1/jvm11-server.options | 103 ++ etc/_copy_cass1/jvm8-clients.options | 9 + etc/_copy_cass1/jvm8-server.options | 76 + etc/_copy_cass1/logback-tools.xml | 34 + etc/_copy_cass1/logback.xml | 124 ++ .../metrics-reporter-config-sample.yaml | 41 + etc/_copy_cass1/triggers/README.txt | 1 + etc/cass1/.gitignore | 11 + etc/cass1/cassandra-env.sh | 307 ++++ etc/cass1/cassandra-rackdc.properties | 39 + etc/cass1/cassandra.yaml | 1419 +++++++++++++++++ etc/cass1/commitlog_archiving.properties | 48 + etc/cass1/jvm-clients.options | 10 + etc/cass1/jvm-server.options | 188 +++ etc/cass1/jvm11-clients.options | 29 + etc/cass1/jvm11-server.options | 103 ++ etc/cass1/jvm8-clients.options | 9 + etc/cass1/jvm8-server.options | 76 + etc/cass1/logback.xml | 124 ++ etc/cass2/.gitignore | 11 + etc/cass2/cassandra-env.sh | 307 ++++ etc/cass2/cassandra-rackdc.properties | 39 + etc/cass2/cassandra.yaml | 1419 +++++++++++++++++ etc/cass2/commitlog_archiving.properties | 48 + etc/cass2/jvm-clients.options | 10 + etc/cass2/jvm-server.options | 188 +++ etc/cass2/jvm11-clients.options | 29 + etc/cass2/jvm11-server.options | 103 ++ etc/cass2/jvm8-clients.options | 9 + etc/cass2/jvm8-server.options | 76 + etc/cass2/logback.xml | 124 ++ etc/cass3/.gitignore | 11 + etc/cass3/cassandra-env.sh | 307 ++++ etc/cass3/cassandra-rackdc.properties | 39 + etc/cass3/cassandra.yaml | 1419 +++++++++++++++++ etc/cass3/commitlog_archiving.properties | 48 + etc/cass3/jvm-clients.options | 10 + etc/cass3/jvm-server.options | 188 +++ etc/cass3/jvm11-clients.options | 29 + etc/cass3/jvm11-server.options | 103 ++ etc/cass3/jvm8-clients.options | 9 + etc/cass3/jvm8-server.options | 76 + etc/cass3/logback.xml | 124 ++ etc/cass_startup_client/.gitignore | 11 + etc/cass_startup_client/cassandra-env.sh | 307 ++++ .../cassandra-rackdc.properties | 39 + etc/cass_startup_client/cassandra.yaml | 1419 +++++++++++++++++ .../commitlog_archiving.properties | 48 + etc/cass_startup_client/jvm-clients.options | 10 + etc/cass_startup_client/jvm-server.options | 188 +++ etc/cass_startup_client/jvm11-clients.options | 29 + etc/cass_startup_client/jvm11-server.options | 103 ++ etc/cass_startup_client/jvm8-clients.options | 9 + etc/cass_startup_client/jvm8-server.options | 76 + etc/cass_startup_client/logback.xml | 124 ++ startup/queries/Dockerfile | 4 + startup/queries/exercise2_3.py | 24 + startup/queries/twitter_queries.cql | 44 + startup/setup/cleaning.py | 83 + startup/setup/setup_db.sh | 11 + startup/setup/setup_db_1.cql | 25 + startup/setup/setup_db_2.cql | 37 + startup/setup/setup_likedFrom.sh | 5 + startup/setup/setup_views.cql | 28 + 80 files changed, 12722 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 cqlsh/cqlshrc create mode 100644 docker-compose.yml create mode 100644 etc/_copy_cass1/README.txt create mode 100644 etc/_copy_cass1/cassandra-env.sh create mode 100644 etc/_copy_cass1/cassandra-jaas.config create mode 100644 etc/_copy_cass1/cassandra-rackdc.properties create mode 100644 etc/_copy_cass1/cassandra-topology.properties create mode 100644 etc/_copy_cass1/cassandra.yaml create mode 100644 etc/_copy_cass1/commitlog_archiving.properties create mode 100644 etc/_copy_cass1/cqlshrc.sample create mode 100644 etc/_copy_cass1/hotspot_compiler create mode 100644 etc/_copy_cass1/jvm-clients.options create mode 100644 etc/_copy_cass1/jvm-server.options create mode 100644 etc/_copy_cass1/jvm11-clients.options create mode 100644 etc/_copy_cass1/jvm11-server.options create mode 100644 etc/_copy_cass1/jvm8-clients.options create mode 100644 etc/_copy_cass1/jvm8-server.options create mode 100644 etc/_copy_cass1/logback-tools.xml create mode 100644 etc/_copy_cass1/logback.xml create mode 100644 etc/_copy_cass1/metrics-reporter-config-sample.yaml create mode 100644 etc/_copy_cass1/triggers/README.txt create mode 100644 etc/cass1/.gitignore create mode 100644 etc/cass1/cassandra-env.sh create mode 100644 etc/cass1/cassandra-rackdc.properties create mode 100644 etc/cass1/cassandra.yaml create mode 100644 etc/cass1/commitlog_archiving.properties create mode 100644 etc/cass1/jvm-clients.options create mode 100644 etc/cass1/jvm-server.options create mode 100644 etc/cass1/jvm11-clients.options create mode 100644 etc/cass1/jvm11-server.options create mode 100644 etc/cass1/jvm8-clients.options create mode 100644 etc/cass1/jvm8-server.options create mode 100644 etc/cass1/logback.xml create mode 100644 etc/cass2/.gitignore create mode 100644 etc/cass2/cassandra-env.sh create mode 100644 etc/cass2/cassandra-rackdc.properties create mode 100644 etc/cass2/cassandra.yaml create mode 100644 etc/cass2/commitlog_archiving.properties create mode 100644 etc/cass2/jvm-clients.options create mode 100644 etc/cass2/jvm-server.options create mode 100644 etc/cass2/jvm11-clients.options create mode 100644 etc/cass2/jvm11-server.options create mode 100644 etc/cass2/jvm8-clients.options create mode 100644 etc/cass2/jvm8-server.options create mode 100644 etc/cass2/logback.xml create mode 100644 etc/cass3/.gitignore create mode 100644 etc/cass3/cassandra-env.sh create mode 100644 etc/cass3/cassandra-rackdc.properties create mode 100644 etc/cass3/cassandra.yaml create mode 100644 etc/cass3/commitlog_archiving.properties create mode 100644 etc/cass3/jvm-clients.options create mode 100644 etc/cass3/jvm-server.options create mode 100644 etc/cass3/jvm11-clients.options create mode 100644 etc/cass3/jvm11-server.options create mode 100644 etc/cass3/jvm8-clients.options create mode 100644 etc/cass3/jvm8-server.options create mode 100644 etc/cass3/logback.xml create mode 100644 etc/cass_startup_client/.gitignore create mode 100644 etc/cass_startup_client/cassandra-env.sh create mode 100644 etc/cass_startup_client/cassandra-rackdc.properties create mode 100644 etc/cass_startup_client/cassandra.yaml create mode 100644 etc/cass_startup_client/commitlog_archiving.properties create mode 100644 etc/cass_startup_client/jvm-clients.options create mode 100644 etc/cass_startup_client/jvm-server.options create mode 100644 etc/cass_startup_client/jvm11-clients.options create mode 100644 etc/cass_startup_client/jvm11-server.options create mode 100644 etc/cass_startup_client/jvm8-clients.options create mode 100644 etc/cass_startup_client/jvm8-server.options create mode 100644 etc/cass_startup_client/logback.xml create mode 100644 startup/queries/Dockerfile create mode 100644 startup/queries/exercise2_3.py create mode 100644 startup/queries/twitter_queries.cql create mode 100644 startup/setup/cleaning.py create mode 100644 startup/setup/setup_db.sh create mode 100644 startup/setup/setup_db_1.cql create mode 100644 startup/setup/setup_db_2.cql create mode 100644 startup/setup/setup_likedFrom.sh create mode 100644 startup/setup/setup_views.cql diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1269488 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +data diff --git a/README.md b/README.md new file mode 100644 index 0000000..c192a0f --- /dev/null +++ b/README.md @@ -0,0 +1,107 @@ +## Cassandra + +Command can be run on the Cassandra Web at `localhost:3000` or in the docker container with: + + sudo docker exec -it cass1 cqlsh + +Note the `MATERIALZIED VIEWS` and `CUSTOM INDEX` are already created in the [startup script](https://github.com/Miracle-Fruit/kikeriki/blob/main/cassandra/startup/setup/setup_db.sh) and don't needed to be run again. + +## Queries + +All queries can also be found [here](https://github.com/Miracle-Fruit/kikeriki/tree/main/cassandra/startup/queries). + +1. Listing all the posts made by an account. + + SELECT content FROM twitter.tweets where author_id = 233248636; + +![result_ex1](../query_results/ex1.png) + +2. Find the 100 accounts with the most followers +``` python +# 2. Find the 100 accounts with the most followers +rows = session.execute('SELECT user_id,follower_len FROM twitter.most_follows;') +sorted_rows = dict(sorted(dict(rows).items(), key=lambda item: item[1])) +most_follows = list(sorted_rows.keys())[-100:] +print("Top 100 most followed accounts:",most_follows) +``` + + Top 100 most followed accounts: [817268, 173732041, 14506809, 158804228, 358775055, 45416789, 302282272, 65913144, 261001122, 14246001, 7702232, 101204352, 280365428, 15439395, 26929220, 46537966, 32774989, 2367911, 7429892, 88097807, 274153775, 41147062, 127973392, 10350, 12127832, 18666844, 279787626, 14511951, 116036694, 225784456, 270449528, 63796828, 22784458, 364917755, 15693493, 17346342, 28933226, 7872262, 14180231, 100581193, 88323281, 131029775, 25952851, 14692604, 21681252, 309366491, 25376226, 22705686, 188108667, 18715024, 3829151, 41172837, 92319025, 24081780, 15102849, 83943787, 15680204, 7846, 184097849, 7377812, 204317520, 24641194, 14691709, 112939321, 7081402, 153226312, 14269220, 94414805, 108811740, 1065921, 221829166, 116952434, 15222083, 6608332, 17759701, 16098603, 7860742, 20471349, 6519522, 19040580, 14536491, 197504076, 12611642, 36198161, 3840, 22841103, 440963134, 20273398, 17092592, 13348, 17093617, 22679419, 208132323, 18776017, 15846407, 18581803, 5442012, 813286, 3359851, 59804598] + +3. Finding the 100 accounts that follow the most of the accounts found in 2). +```python +followed_accs = session.execute(f'SELECT follower_id FROM twitter.follower_relation WHERE user_id IN {tuple(most_follows)};') +_list = list() +for row in followed_accs: + _list.append(row[0]) + +followed_top_100 = Counter(_list).most_common(100) +print("100 accounts that follow the most of the accounts found in 2)",followed_top_100) +``` + + 100 accounts that follow the most of the accounts found in 2) [(3359851, 47), (7860742, 47), (15913, 45), (7861312, 45), (16098603, 43), (18776017, 41), (10350, 39), (48485771, 39), (22679419, 38), (3443591, 37), (18581803, 37), (18927441, 37), (26281970, 37), (7872262, 36), (24742040, 36), (5442012, 36), (10671602, 35), (11928542, 35), (14922225, 35), (16453996, 35), (21681252, 35), (14589257, 34), (15853668, 34), (9451052, 34), (65913144, 34), (14269220, 34), (15234657, 34), (17092592, 34), (59804598, 34), (87764480, 34), (40981798, 34), (16475194, 34), (16464746, 33), (93905958, 33), (20152005, 33), (42361118, 33), (36629388, 33), (18666844, 32), (19413393, 32), (12127832, 32), (14180231, 32), (14983833, 32), (25026165, 32), (18742444, 32), (29758446, 31), (20880546, 31), (116952434, 31), (30207757, 31), (21195122, 31), (43003845, 31), (22784458, 30), (19040580, 30), (14691709, 30), (24004172, 30), (1065921, 30), (26280712, 30), (43170475, 30), (22462180, 30), (7846, 29), (1183041, 29), (3040621, 29), (7377812, 29), (34428380, 29), (20273398, 28), (14230524, 28), (15222083, 28), (24641194, 28), (83943787, 28), (36198161, 28), (14536491, 28), (813286, 27), (12611642, 27), (29514951, 27), (31353077, 27), (18996905, 27), (14471778, 27), (15838599, 27), (16112634, 27), (17526132, 27), (17759701, 27), (31331740, 27), (25952851, 27), (84043660, 27), (9431932, 27), (20397258, 27), (127973392, 27), (16674726, 27), (116036694, 27), (972651, 26), (13687132, 26), (4068821, 26), (7702232, 26), (6080022, 26), (17224642, 26), (20935355, 26), (24081780, 26), (26033920, 26), (28933226, 26), (43933017, 26), (101633415, 26)] + +4. Listing the information for the personal home page of any account (best try with the accounts found in 2); the start page should contain the following (implement as separate queries): + * the number of followers && the number of followed accounts + + + SELECT follower_len, follows_len FROM twitter.user_stats WHERE user_id = 233248636; //cheack user_id can be changed + + ![result_ex4_1](../query_results/ex4_1.png) + + + * either the 25 newest or the 25 most popular posts of the followed accounts (via DB query) + + 25 newest + + CREATE MATERIALIZED VIEW twitter.start_view_new AS + SELECT user_id_x,follower_id,number_of_likes,number_of_shares,date_time,name,author,content,id FROM twitter.user + WHERE user_id_x IS NOT NULL AND follower_id IS NOT NULL AND date_time IS NOT NULL AND id IS NOT NULL + PRIMARY KEY ((user_id_x),date_time,follower_id,id); + + SELECT * FROM twitter.start_view_new WHERE user_id_x = 172883064 ORDER BY date_time DESC LIMIT 25; + + ![result_ex4_2](../query_results/ex4_2_date.png) + + + 25 most popular + + CREATE MATERIALIZED VIEW twitter.start_view_like AS + SELECT user_id_x,follower_id,number_of_likes,number_of_shares,date_time,author,name,content,id FROM twitter.user + WHERE user_id_x IS NOT NULL AND follower_id IS NOT NULL AND number_of_likes IS NOT NULL AND id IS NOT NULL + PRIMARY KEY ((user_id_x),number_of_likes,follower_id,id); + + SELECT * FROM twitter.start_view_like WHERE user_id_x = 172883064 ORDER BY number_of_likes DESC LIMIT 25; + + ![result_ex4_2](../query_results/ex4_2_likes.png) + +5. Caching of the posts for the home page (cf. 4) requires a so-called fan-out in the cache of each follower when writing a new post + + CREATE MATERIALIZED VIEW twitter.start_view_user1 AS + SELECT follower_id,number_of_likes,date_time,author,name,content,id FROM twitter.user + WHERE user_id_x IS NOT NULL AND user_id_x=172883064 AND follower_id IS NOT NULL AND number_of_likes IS NOT NULL AND id IS NOT NULL + PRIMARY KEY ((user_id_x),number_of_likes,follower_id,id); + // # order by need partion key in WHERE + SELECT * from twitter.start_view_taylor WHERE user_id_x=172883064 ORDER BY number_of_likes DESC LIMIT 25; + // INSERT new tweet + INSERT INTO twitter.user + (user_id_x,follower_id,name,author ,content ,country ,date_time ,id ,language ,latitude ,longitude ,number_of_likes ,number_of_shares ,user_id_y) + VALUES + (172883064, 233248636, 'NoName','taylorswift12', 'Hallo there BDEA','DE', dateof(now()), 'NoID', 'de', 48, 48, 10000000, 0, 0); + + INSERT INTO twitter.tweets(author, content, country, date_time, id, language, latitude, longitude, number_of_likes, number_of_shares ,author_id) + VALUES + ('taylorswift12', 'Hallo there BDEA','DE', dateof(now()), 'NoID', 'de', 48, 48, 10000000, 0, 0 '233248636'); + + ![result_ex4_2](../query_results/ex4_2_likes.png) + + +6. List of the 25 most popular posts that contain a given word (if possible also with AND linking several words) + + // To order by the 25 most number_of_like is at our knowledge not possible with the current dataschema + CREATE CUSTOM INDEX search_in ON twitter.tweets (content) USING 'org.apache.cassandra.index.sasi.SASIIndex' + WITH OPTIONS = { 'mode': 'CONTAINS', 'analyzer_class': 'org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer', + 'case_sensitive': 'false' }; + SELECT * from twitter.tweets WHERE content LIKE '%world%' limit 25; + + ![result_ex4_2](../query_results/ex6.png) + diff --git a/cqlsh/cqlshrc b/cqlsh/cqlshrc new file mode 100644 index 0000000..8ceefa6 --- /dev/null +++ b/cqlsh/cqlshrc @@ -0,0 +1,10 @@ +;; Setting for cqlsh can be found https://docs.datastax.com/en/dse/5.1/cql/cql/cql_reference/cqlsh_commands/cqlshCqlshrc.html +;; Sample cqlshrc file on GitHub: https://github.com/apache/cassandra/blob/067f4c74612698881feec09039d71d12a5936418/conf/cqlshrc.sample +[ui] +;; Display timezone +timezone = Etc/UTC +;; Used for displaying timestamps (and reading them with COPY) +time_format = %d/%m/%Y %H:%M +[copy] +ESCAPE = \ +QUOTE = " diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..fbe8d86 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,135 @@ +# version: "3.9" # optional since v1.27.0 + +networks: + spaceandtime: + ipam: + config: + - subnet: 172.20.0.0/24 # static ip management required for cassandra web, as this does not work with docker dns + +services: + cass1: + image: cassandra:4.0.4 # latest lts version as of 31.05.2022 + container_name: cass1 + hostname: cass1 + healthcheck: + test: ["CMD", "cqlsh", "-e", "describe keyspaces" ] + interval: 10s + timeout: 10s + start_period: 50s + retries: 10 + networks: + spaceandtime: + ipv4_address: 172.20.0.6 + ports: + - "9042:9042" + volumes: + - ./cqlsh:/root/.cassandra + - ./startup:/tmp/startup + - ./data/cass1:/var/lib/cassandra + - ./etc/cass1:/etc/cassandra # currently not needed as this is configured with the below env variables + environment: &environment + MAX_HEAP_SIZE: 1024M + HEAP_NEWSIZE: 1024M + CASSANDRA_SEEDS: "cass1,cass2" + CASSANDRA_CLUSTER_NAME: SolarSystem + CASSANDRA_DC: Mars + CASSANDRA_RACK: West + CASSANDRA_ENDPOINT_SNITCH: GossipingPropertyFileSnitch + CASSANDRA_NUM_TOKENS: 128 + restart: always + + cass2: + image: cassandra:4.0.4 # latest lts version as of 31.05.2022 + container_name: cass2 + hostname: cass2 + healthcheck: + test: ["CMD", "cqlsh", "-e", "describe keyspaces" ] + interval: 10s + timeout: 10s + start_period: 50s + retries: 10 + networks: + spaceandtime: + ipv4_address: 172.20.0.7 + ports: + - "9043:9042" + volumes: + - ./cqlsh:/root/.cassandra + - ./startup:/tmp/startup + - ./data/cass2:/var/lib/cassandra + - ./etc/cass2:/etc/cassandra + environment: *environment + depends_on: + cass1: + condition: service_healthy # service_healthy --> sometimes not working, alternativly restart always policy below + restart: always + + cass3: + image: cassandra:4.0.4 # latest lts version as of 31.05.2022 + container_name: cass3 + hostname: cass3 + healthcheck: + test: ["CMD", "cqlsh", "-e", "describe keyspaces" ] + interval: 10s + timeout: 10s + start_period: 50s + retries: 10 + networks: + spaceandtime: + ipv4_address: 172.20.0.8 + ports: + - "9044:9042" + volumes: + - ./cqlsh:/root/.cassandra + - ./startup:/tmp/startup + - ./data/cass3:/var/lib/cassandra + - ./etc/cass3:/etc/cassandra + environment: *environment + depends_on: + cass2: + condition: service_healthy # service_healthy --> sometimes not working, alternativly restart always policy below + restart: always + + cass_startup_client: + image: cassandra:4.0.4 # latest lts version as of 31.05.2022 + container_name: cass_startup_client + hostname: cass_startup_client + networks: + spaceandtime: + ipv4_address: 172.20.0.11 + volumes: + - ./cqlsh:/root/.cassandra + - ./startup:/tmp/startup + - ./data/cass_startup_client:/var/lib/cassandra + - ./etc/cass_startup_client:/etc/cassandra + environment: *environment + depends_on: + cass1: + condition: service_healthy # service_healthy --> sometimes not working, alternativly restart always policy below + cass2: + condition: service_healthy # service_healthy --> sometimes not working, alternativly restart always policy below + cass3: + condition: service_healthy + command: bash -c "/tmp/startup/setup/setup_db.sh && /tmp/startup/setup/setup_likedFrom.sh" + + cassweb: + image: ruby:2.4.1 # latest stable version as of 31.05.2022 + container_name: cassweb + hostname: cassweb + command: bash -c "gem install cassandra-web && cassandra-web --hosts '172.20.0.6, 172.20.0.7, 172.20.0.8' --port '9042' --username 'cassandra' --password 'cassandra'" + networks: + spaceandtime: + ipv4_address: 172.20.0.9 + ports: + - "3000:3000" + depends_on: + cass1: + condition: service_healthy # service_healthy --> sometimes not working, alternativly restart always policy below + cass2: + condition: service_healthy # service_healthy --> sometimes not working, alternativly restart always policy below + cass3: + condition: service_healthy # service_healthy --> sometimes not working, alternativly restart always policy below + cass_startup_client: + condition: service_started + restart: always + diff --git a/etc/_copy_cass1/README.txt b/etc/_copy_cass1/README.txt new file mode 100644 index 0000000..e44d4a3 --- /dev/null +++ b/etc/_copy_cass1/README.txt @@ -0,0 +1,13 @@ +Required configuration files +============================ + +cassandra.yaml: main Cassandra configuration file +logback.xml: logback configuration file for Cassandra server + + +Optional configuration files +============================ + +cassandra-topology.properties: used by PropertyFileSnitch + + diff --git a/etc/_copy_cass1/cassandra-env.sh b/etc/_copy_cass1/cassandra-env.sh new file mode 100644 index 0000000..2e3c8c9 --- /dev/null +++ b/etc/_copy_cass1/cassandra-env.sh @@ -0,0 +1,307 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +calculate_heap_sizes() +{ + case "`uname`" in + Linux) + system_memory_in_mb=`free -m | awk '/:/ {print $2;exit}'` + system_cpu_cores=`egrep -c 'processor([[:space:]]+):.*' /proc/cpuinfo` + ;; + FreeBSD) + system_memory_in_bytes=`sysctl hw.physmem | awk '{print $2}'` + system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024` + system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'` + ;; + SunOS) + system_memory_in_mb=`prtconf | awk '/Memory size:/ {print $3}'` + system_cpu_cores=`psrinfo | wc -l` + ;; + Darwin) + system_memory_in_bytes=`sysctl hw.memsize | awk '{print $2}'` + system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024` + system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'` + ;; + *) + # assume reasonable defaults for e.g. a modern desktop or + # cheap server + system_memory_in_mb="2048" + system_cpu_cores="2" + ;; + esac + + # some systems like the raspberry pi don't report cores, use at least 1 + if [ "$system_cpu_cores" -lt "1" ] + then + system_cpu_cores="1" + fi + + # set max heap size based on the following + # max(min(1/2 ram, 1024MB), min(1/4 ram, 8GB)) + # calculate 1/2 ram and cap to 1024MB + # calculate 1/4 ram and cap to 8192MB + # pick the max + half_system_memory_in_mb=`expr $system_memory_in_mb / 2` + quarter_system_memory_in_mb=`expr $half_system_memory_in_mb / 2` + if [ "$half_system_memory_in_mb" -gt "1024" ] + then + half_system_memory_in_mb="1024" + fi + if [ "$quarter_system_memory_in_mb" -gt "8192" ] + then + quarter_system_memory_in_mb="8192" + fi + if [ "$half_system_memory_in_mb" -gt "$quarter_system_memory_in_mb" ] + then + max_heap_size_in_mb="$half_system_memory_in_mb" + else + max_heap_size_in_mb="$quarter_system_memory_in_mb" + fi + MAX_HEAP_SIZE="${max_heap_size_in_mb}M" + + # Young gen: min(max_sensible_per_modern_cpu_core * num_cores, 1/4 * heap size) + max_sensible_yg_per_core_in_mb="100" + max_sensible_yg_in_mb=`expr $max_sensible_yg_per_core_in_mb "*" $system_cpu_cores` + + desired_yg_in_mb=`expr $max_heap_size_in_mb / 4` + + if [ "$desired_yg_in_mb" -gt "$max_sensible_yg_in_mb" ] + then + HEAP_NEWSIZE="${max_sensible_yg_in_mb}M" + else + HEAP_NEWSIZE="${desired_yg_in_mb}M" + fi +} + +# Sets the path where logback and GC logs are written. +if [ "x$CASSANDRA_LOG_DIR" = "x" ] ; then + CASSANDRA_LOG_DIR="$CASSANDRA_HOME/logs" +fi + +#GC log path has to be defined here because it needs to access CASSANDRA_HOME +if [ $JAVA_VERSION -ge 11 ] ; then + # See description of https://bugs.openjdk.java.net/browse/JDK-8046148 for details about the syntax + # The following is the equivalent to -XX:+PrintGCDetails -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M + echo "$JVM_OPTS" | grep -qe "-[X]log:gc" + if [ "$?" = "1" ] ; then # [X] to prevent ccm from replacing this line + # only add -Xlog:gc if it's not mentioned in jvm-server.options file + mkdir -p ${CASSANDRA_LOG_DIR} + JVM_OPTS="$JVM_OPTS -Xlog:gc=info,heap*=trace,age*=debug,safepoint=info,promotion*=trace:file=${CASSANDRA_LOG_DIR}/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760" + fi +else + # Java 8 + echo "$JVM_OPTS" | grep -qe "-[X]loggc" + if [ "$?" = "1" ] ; then # [X] to prevent ccm from replacing this line + # only add -Xlog:gc if it's not mentioned in jvm-server.options file + mkdir -p ${CASSANDRA_LOG_DIR} + JVM_OPTS="$JVM_OPTS -Xloggc:${CASSANDRA_LOG_DIR}/gc.log" + fi +fi + +# Check what parameters were defined on jvm-server.options file to avoid conflicts +echo $JVM_OPTS | grep -q Xmn +DEFINED_XMN=$? +echo $JVM_OPTS | grep -q Xmx +DEFINED_XMX=$? +echo $JVM_OPTS | grep -q Xms +DEFINED_XMS=$? +echo $JVM_OPTS | grep -q UseConcMarkSweepGC +USING_CMS=$? +echo $JVM_OPTS | grep -q +UseG1GC +USING_G1=$? + +# Override these to set the amount of memory to allocate to the JVM at +# start-up. For production use you may wish to adjust this for your +# environment. MAX_HEAP_SIZE is the total amount of memory dedicated +# to the Java heap. HEAP_NEWSIZE refers to the size of the young +# generation. Both MAX_HEAP_SIZE and HEAP_NEWSIZE should be either set +# or not (if you set one, set the other). +# +# The main trade-off for the young generation is that the larger it +# is, the longer GC pause times will be. The shorter it is, the more +# expensive GC will be (usually). +# +# The example HEAP_NEWSIZE assumes a modern 8-core+ machine for decent pause +# times. If in doubt, and if you do not particularly want to tweak, go with +# 100 MB per physical CPU core. + +#MAX_HEAP_SIZE="4G" +#HEAP_NEWSIZE="800M" + +# Set this to control the amount of arenas per-thread in glibc +#export MALLOC_ARENA_MAX=4 + +# only calculate the size if it's not set manually +if [ "x$MAX_HEAP_SIZE" = "x" ] && [ "x$HEAP_NEWSIZE" = "x" -o $USING_G1 -eq 0 ]; then + calculate_heap_sizes +elif [ "x$MAX_HEAP_SIZE" = "x" ] || [ "x$HEAP_NEWSIZE" = "x" -a $USING_G1 -ne 0 ]; then + echo "please set or unset MAX_HEAP_SIZE and HEAP_NEWSIZE in pairs when using CMS GC (see cassandra-env.sh)" + exit 1 +fi + +if [ "x$MALLOC_ARENA_MAX" = "x" ] ; then + export MALLOC_ARENA_MAX=4 +fi + +# We only set -Xms and -Xmx if they were not defined on jvm-server.options file +# If defined, both Xmx and Xms should be defined together. +if [ $DEFINED_XMX -ne 0 ] && [ $DEFINED_XMS -ne 0 ]; then + JVM_OPTS="$JVM_OPTS -Xms${MAX_HEAP_SIZE}" + JVM_OPTS="$JVM_OPTS -Xmx${MAX_HEAP_SIZE}" +elif [ $DEFINED_XMX -ne 0 ] || [ $DEFINED_XMS -ne 0 ]; then + echo "Please set or unset -Xmx and -Xms flags in pairs on jvm-server.options file." + exit 1 +fi + +# We only set -Xmn flag if it was not defined in jvm-server.options file +# and if the CMS GC is being used +# If defined, both Xmn and Xmx should be defined together. +if [ $DEFINED_XMN -eq 0 ] && [ $DEFINED_XMX -ne 0 ]; then + echo "Please set or unset -Xmx and -Xmn flags in pairs on jvm-server.options file." + exit 1 +elif [ $DEFINED_XMN -ne 0 ] && [ $USING_CMS -eq 0 ]; then + JVM_OPTS="$JVM_OPTS -Xmn${HEAP_NEWSIZE}" +fi + +# We fail to start if -Xmn is used with G1 GC is being used +# See comments for -Xmn in jvm-server.options +if [ $DEFINED_XMN -eq 0 ] && [ $USING_G1 -eq 0 ]; then + echo "It is not recommended to set -Xmn with the G1 garbage collector. See comments for -Xmn in jvm-server.options for details." + exit 1 +fi + +if [ "$JVM_ARCH" = "64-Bit" ] && [ $USING_CMS -eq 0 ]; then + JVM_OPTS="$JVM_OPTS -XX:+UseCondCardMark" +fi + +# provides hints to the JIT compiler +JVM_OPTS="$JVM_OPTS -XX:CompileCommandFile=$CASSANDRA_CONF/hotspot_compiler" + +# add the jamm javaagent +JVM_OPTS="$JVM_OPTS -javaagent:$CASSANDRA_HOME/lib/jamm-0.3.2.jar" + +# set jvm HeapDumpPath with CASSANDRA_HEAPDUMP_DIR +if [ "x$CASSANDRA_HEAPDUMP_DIR" != "x" ]; then + JVM_OPTS="$JVM_OPTS -XX:HeapDumpPath=$CASSANDRA_HEAPDUMP_DIR/cassandra-`date +%s`-pid$$.hprof" +fi + +# stop the jvm on OutOfMemoryError as it can result in some data corruption +# uncomment the preferred option +# ExitOnOutOfMemoryError and CrashOnOutOfMemoryError require a JRE greater or equals to 1.7 update 101 or 1.8 update 92 +# For OnOutOfMemoryError we cannot use the JVM_OPTS variables because bash commands split words +# on white spaces without taking quotes into account +# JVM_OPTS="$JVM_OPTS -XX:+ExitOnOutOfMemoryError" +# JVM_OPTS="$JVM_OPTS -XX:+CrashOnOutOfMemoryError" +JVM_ON_OUT_OF_MEMORY_ERROR_OPT="-XX:OnOutOfMemoryError=kill -9 %p" + +# print an heap histogram on OutOfMemoryError +# JVM_OPTS="$JVM_OPTS -Dcassandra.printHeapHistogramOnOutOfMemoryError=true" + +# jmx: metrics and administration interface +# +# add this if you're having trouble connecting: +# JVM_OPTS="$JVM_OPTS -Djava.rmi.server.hostname=" +# +# see +# https://blogs.oracle.com/jmxetc/entry/troubleshooting_connection_problems_in_jconsole +# for more on configuring JMX through firewalls, etc. (Short version: +# get it working with no firewall first.) +# +# Cassandra ships with JMX accessible *only* from localhost. +# To enable remote JMX connections, uncomment lines below +# with authentication and/or ssl enabled. See https://wiki.apache.org/cassandra/JmxSecurity +# +if [ "x$LOCAL_JMX" = "x" ]; then + LOCAL_JMX=yes +fi + +# Specifies the default port over which Cassandra will be available for +# JMX connections. +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +JMX_PORT="7199" + +if [ "$LOCAL_JMX" = "yes" ]; then + JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.local.port=$JMX_PORT" + JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=false" +else + JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.port=$JMX_PORT" + # if ssl is enabled the same port cannot be used for both jmx and rmi so either + # pick another value for this property or comment out to use a random port (though see CASSANDRA-7087 for origins) + JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.rmi.port=$JMX_PORT" + + # turn on JMX authentication. See below for further options + JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=true" + + # jmx ssl options + #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl=true" + #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.need.client.auth=true" + #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.enabled.protocols=" + #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.enabled.cipher.suites=" + #JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.keyStore=/path/to/keystore" + #JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.keyStorePassword=" + #JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.trustStore=/path/to/truststore" + #JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.trustStorePassword=" +fi + +# jmx authentication and authorization options. By default, auth is only +# activated for remote connections but they can also be enabled for local only JMX +## Basic file based authn & authz +JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.password.file=/etc/cassandra/jmxremote.password" +#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.access.file=/etc/cassandra/jmxremote.access" +## Custom auth settings which can be used as alternatives to JMX's out of the box auth utilities. +## JAAS login modules can be used for authentication by uncommenting these two properties. +## Cassandra ships with a LoginModule implementation - org.apache.cassandra.auth.CassandraLoginModule - +## which delegates to the IAuthenticator configured in cassandra.yaml. See the sample JAAS configuration +## file cassandra-jaas.config +#JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.login.config=CassandraLogin" +#JVM_OPTS="$JVM_OPTS -Djava.security.auth.login.config=$CASSANDRA_CONF/cassandra-jaas.config" + +## Cassandra also ships with a helper for delegating JMX authz calls to the configured IAuthorizer, +## uncomment this to use it. Requires one of the two authentication options to be enabled +#JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.authorizer=org.apache.cassandra.auth.jmx.AuthorizationProxy" + +# To use mx4j, an HTML interface for JMX, add mx4j-tools.jar to the lib/ +# directory. +# See http://cassandra.apache.org/doc/latest/operating/metrics.html#jmx +# By default mx4j listens on the broadcast_address, port 8081. Uncomment the following lines +# to control its listen address and port. +#MX4J_ADDRESS="127.0.0.1" +#MX4J_PORT="8081" + +# Cassandra uses SIGAR to capture OS metrics CASSANDRA-7838 +# for SIGAR we have to set the java.library.path +# to the location of the native libraries. +JVM_OPTS="$JVM_OPTS -Djava.library.path=$CASSANDRA_HOME/lib/sigar-bin" + +if [ "x$MX4J_ADDRESS" != "x" ]; then + if [[ "$MX4J_ADDRESS" == \-Dmx4jaddress* ]]; then + # Backward compatible with the older style #13578 + JVM_OPTS="$JVM_OPTS $MX4J_ADDRESS" + else + JVM_OPTS="$JVM_OPTS -Dmx4jaddress=$MX4J_ADDRESS" + fi +fi +if [ "x$MX4J_PORT" != "x" ]; then + if [[ "$MX4J_PORT" == \-Dmx4jport* ]]; then + # Backward compatible with the older style #13578 + JVM_OPTS="$JVM_OPTS $MX4J_PORT" + else + JVM_OPTS="$JVM_OPTS -Dmx4jport=$MX4J_PORT" + fi +fi + +JVM_OPTS="$JVM_OPTS $JVM_EXTRA_OPTS" + diff --git a/etc/_copy_cass1/cassandra-jaas.config b/etc/_copy_cass1/cassandra-jaas.config new file mode 100644 index 0000000..f3a9bf7 --- /dev/null +++ b/etc/_copy_cass1/cassandra-jaas.config @@ -0,0 +1,4 @@ +// Delegates authentication to Cassandra's configured IAuthenticator +CassandraLogin { + org.apache.cassandra.auth.CassandraLoginModule REQUIRED; +}; diff --git a/etc/_copy_cass1/cassandra-rackdc.properties b/etc/_copy_cass1/cassandra-rackdc.properties new file mode 100644 index 0000000..84716f5 --- /dev/null +++ b/etc/_copy_cass1/cassandra-rackdc.properties @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# These properties are used with GossipingPropertyFileSnitch and will +# indicate the rack and dc for this node +dc= Mars +rack= West + +# Add a suffix to a datacenter name. Used by the Ec2Snitch and Ec2MultiRegionSnitch +# to append a string to the EC2 region name. +#dc_suffix= + +# Uncomment the following line to make this snitch prefer the internal ip when possible, as the Ec2MultiRegionSnitch does. +# prefer_local=true + +# Datacenter and rack naming convention used by the Ec2Snitch and Ec2MultiRegionSnitch. +# Options are: +# legacy : datacenter name is the part of the availability zone name preceding the last "-" +# when the zone ends in -1 and includes the number if not -1. Rack is the portion of +# the availability zone name following the last "-". +# Examples: us-west-1a => dc: us-west, rack: 1a; us-west-2b => dc: us-west-2, rack: 2b; +# YOU MUST USE THIS VALUE IF YOU ARE UPGRADING A PRE-4.0 CLUSTER +# standard : Default value. datacenter name is the standard AWS region name, including the number. +# rack name is the region plus the availability zone letter. +# Examples: us-west-1a => dc: us-west-1, rack: us-west-1a; us-west-2b => dc: us-west-2, rack: us-west-2b; +# ec2_naming_scheme=standard diff --git a/etc/_copy_cass1/cassandra-topology.properties b/etc/_copy_cass1/cassandra-topology.properties new file mode 100644 index 0000000..d1423d4 --- /dev/null +++ b/etc/_copy_cass1/cassandra-topology.properties @@ -0,0 +1,41 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Cassandra Node IP=Data Center:Rack +192.168.1.100=DC1:RAC1 +192.168.2.200=DC2:RAC2 + +10.0.0.10=DC1:RAC1 +10.0.0.11=DC1:RAC1 +10.0.0.12=DC1:RAC2 + +10.20.114.10=DC2:RAC1 +10.20.114.11=DC2:RAC1 + +10.21.119.13=DC3:RAC1 +10.21.119.10=DC3:RAC1 + +10.0.0.13=DC1:RAC2 +10.21.119.14=DC3:RAC2 +10.20.114.15=DC2:RAC2 + +# default for unknown nodes +default=DC1:r1 + +# Native IPv6 is supported, however you must escape the colon in the IPv6 Address +# Also be sure to comment out JVM_OPTS="$JVM_OPTS -Djava.net.preferIPv4Stack=true" +# in cassandra-env.sh +fe80\:0\:0\:0\:202\:b3ff\:fe1e\:8329=DC1:RAC3 diff --git a/etc/_copy_cass1/cassandra.yaml b/etc/_copy_cass1/cassandra.yaml new file mode 100644 index 0000000..4baf617 --- /dev/null +++ b/etc/_copy_cass1/cassandra.yaml @@ -0,0 +1,1419 @@ + +# Cassandra storage config YAML + +# NOTE: +# See https://cassandra.apache.org/doc/latest/configuration/ for +# full explanations of configuration directives +# /NOTE + +# The name of the cluster. This is mainly used to prevent machines in +# one logical cluster from joining another. +cluster_name: SolarSystem + +# This defines the number of tokens randomly assigned to this node on the ring +# The more tokens, relative to other nodes, the larger the proportion of data +# that this node will store. You probably want all nodes to have the same number +# of tokens assuming they have equal hardware capability. +# +# If you leave this unspecified, Cassandra will use the default of 1 token for legacy compatibility, +# and will use the initial_token as described below. +# +# Specifying initial_token will override this setting on the node's initial start, +# on subsequent starts, this setting will apply even if initial token is set. +# +# See https://cassandra.apache.org/doc/latest/getting_started/production.html#tokens for +# best practice information about num_tokens. +# +num_tokens: 128 + +# Triggers automatic allocation of num_tokens tokens for this node. The allocation +# algorithm attempts to choose tokens in a way that optimizes replicated load over +# the nodes in the datacenter for the replica factor. +# +# The load assigned to each node will be close to proportional to its number of +# vnodes. +# +# Only supported with the Murmur3Partitioner. + +# Replica factor is determined via the replication strategy used by the specified +# keyspace. +# allocate_tokens_for_keyspace: KEYSPACE + +# Replica factor is explicitly set, regardless of keyspace or datacenter. +# This is the replica factor within the datacenter, like NTS. +allocate_tokens_for_local_replication_factor: 3 + +# initial_token allows you to specify tokens manually. While you can use it with +# vnodes (num_tokens > 1, above) -- in which case you should provide a +# comma-separated list -- it's primarily used when adding nodes to legacy clusters +# that do not have vnodes enabled. +# initial_token: + +# May either be "true" or "false" to enable globally +hinted_handoff_enabled: true + +# When hinted_handoff_enabled is true, a black list of data centers that will not +# perform hinted handoff +# hinted_handoff_disabled_datacenters: +# - DC1 +# - DC2 + +# this defines the maximum amount of time a dead host will have hints +# generated. After it has been dead this long, new hints for it will not be +# created until it has been seen alive and gone down again. +max_hint_window_in_ms: 10800000 # 3 hours + +# Maximum throttle in KBs per second, per delivery thread. This will be +# reduced proportionally to the number of nodes in the cluster. (If there +# are two nodes in the cluster, each delivery thread will use the maximum +# rate; if there are three, each will throttle to half of the maximum, +# since we expect two nodes to be delivering hints simultaneously.) +hinted_handoff_throttle_in_kb: 1024 + +# Number of threads with which to deliver hints; +# Consider increasing this number when you have multi-dc deployments, since +# cross-dc handoff tends to be slower +max_hints_delivery_threads: 2 + +# Directory where Cassandra should store hints. +# If not set, the default directory is $CASSANDRA_HOME/data/hints. +# hints_directory: /var/lib/cassandra/hints + +# How often hints should be flushed from the internal buffers to disk. +# Will *not* trigger fsync. +hints_flush_period_in_ms: 10000 + +# Maximum size for a single hints file, in megabytes. +max_hints_file_size_in_mb: 128 + +# Compression to apply to the hint files. If omitted, hints files +# will be written uncompressed. LZ4, Snappy, and Deflate compressors +# are supported. +#hints_compression: +# - class_name: LZ4Compressor +# parameters: +# - + +# Maximum throttle in KBs per second, total. This will be +# reduced proportionally to the number of nodes in the cluster. +batchlog_replay_throttle_in_kb: 1024 + +# Authentication backend, implementing IAuthenticator; used to identify users +# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthenticator, +# PasswordAuthenticator}. +# +# - AllowAllAuthenticator performs no checks - set it to disable authentication. +# - PasswordAuthenticator relies on username/password pairs to authenticate +# users. It keeps usernames and hashed passwords in system_auth.roles table. +# Please increase system_auth keyspace replication factor if you use this authenticator. +# If using PasswordAuthenticator, CassandraRoleManager must also be used (see below) +authenticator: AllowAllAuthenticator + +# Authorization backend, implementing IAuthorizer; used to limit access/provide permissions +# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthorizer, +# CassandraAuthorizer}. +# +# - AllowAllAuthorizer allows any action to any user - set it to disable authorization. +# - CassandraAuthorizer stores permissions in system_auth.role_permissions table. Please +# increase system_auth keyspace replication factor if you use this authorizer. +authorizer: AllowAllAuthorizer + +# Part of the Authentication & Authorization backend, implementing IRoleManager; used +# to maintain grants and memberships between roles. +# Out of the box, Cassandra provides org.apache.cassandra.auth.CassandraRoleManager, +# which stores role information in the system_auth keyspace. Most functions of the +# IRoleManager require an authenticated login, so unless the configured IAuthenticator +# actually implements authentication, most of this functionality will be unavailable. +# +# - CassandraRoleManager stores role data in the system_auth keyspace. Please +# increase system_auth keyspace replication factor if you use this role manager. +role_manager: CassandraRoleManager + +# Network authorization backend, implementing INetworkAuthorizer; used to restrict user +# access to certain DCs +# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllNetworkAuthorizer, +# CassandraNetworkAuthorizer}. +# +# - AllowAllNetworkAuthorizer allows access to any DC to any user - set it to disable authorization. +# - CassandraNetworkAuthorizer stores permissions in system_auth.network_permissions table. Please +# increase system_auth keyspace replication factor if you use this authorizer. +network_authorizer: AllowAllNetworkAuthorizer + +# Validity period for roles cache (fetching granted roles can be an expensive +# operation depending on the role manager, CassandraRoleManager is one example) +# Granted roles are cached for authenticated sessions in AuthenticatedUser and +# after the period specified here, become eligible for (async) reload. +# Defaults to 2000, set to 0 to disable caching entirely. +# Will be disabled automatically for AllowAllAuthenticator. +roles_validity_in_ms: 2000 + +# Refresh interval for roles cache (if enabled). +# After this interval, cache entries become eligible for refresh. Upon next +# access, an async reload is scheduled and the old value returned until it +# completes. If roles_validity_in_ms is non-zero, then this must be +# also. +# Defaults to the same value as roles_validity_in_ms. +# roles_update_interval_in_ms: 2000 + +# Validity period for permissions cache (fetching permissions can be an +# expensive operation depending on the authorizer, CassandraAuthorizer is +# one example). Defaults to 2000, set to 0 to disable. +# Will be disabled automatically for AllowAllAuthorizer. +permissions_validity_in_ms: 2000 + +# Refresh interval for permissions cache (if enabled). +# After this interval, cache entries become eligible for refresh. Upon next +# access, an async reload is scheduled and the old value returned until it +# completes. If permissions_validity_in_ms is non-zero, then this must be +# also. +# Defaults to the same value as permissions_validity_in_ms. +# permissions_update_interval_in_ms: 2000 + +# Validity period for credentials cache. This cache is tightly coupled to +# the provided PasswordAuthenticator implementation of IAuthenticator. If +# another IAuthenticator implementation is configured, this cache will not +# be automatically used and so the following settings will have no effect. +# Please note, credentials are cached in their encrypted form, so while +# activating this cache may reduce the number of queries made to the +# underlying table, it may not bring a significant reduction in the +# latency of individual authentication attempts. +# Defaults to 2000, set to 0 to disable credentials caching. +credentials_validity_in_ms: 2000 + +# Refresh interval for credentials cache (if enabled). +# After this interval, cache entries become eligible for refresh. Upon next +# access, an async reload is scheduled and the old value returned until it +# completes. If credentials_validity_in_ms is non-zero, then this must be +# also. +# Defaults to the same value as credentials_validity_in_ms. +# credentials_update_interval_in_ms: 2000 + +# The partitioner is responsible for distributing groups of rows (by +# partition key) across nodes in the cluster. The partitioner can NOT be +# changed without reloading all data. If you are adding nodes or upgrading, +# you should set this to the same partitioner that you are currently using. +# +# The default partitioner is the Murmur3Partitioner. Older partitioners +# such as the RandomPartitioner, ByteOrderedPartitioner, and +# OrderPreservingPartitioner have been included for backward compatibility only. +# For new clusters, you should NOT change this value. +# +partitioner: org.apache.cassandra.dht.Murmur3Partitioner + +# Directories where Cassandra should store data on disk. If multiple +# directories are specified, Cassandra will spread data evenly across +# them by partitioning the token ranges. +# If not set, the default directory is $CASSANDRA_HOME/data/data. +# data_file_directories: +# - /var/lib/cassandra/data + +# Directory were Cassandra should store the data of the local system keyspaces. +# By default Cassandra will store the data of the local system keyspaces in the first of the data directories specified +# by data_file_directories. +# This approach ensures that if one of the other disks is lost Cassandra can continue to operate. For extra security +# this setting allows to store those data on a different directory that provides redundancy. +# local_system_data_file_directory: + +# commit log. when running on magnetic HDD, this should be a +# separate spindle than the data directories. +# If not set, the default directory is $CASSANDRA_HOME/data/commitlog. +# commitlog_directory: /var/lib/cassandra/commitlog + +# Enable / disable CDC functionality on a per-node basis. This modifies the logic used +# for write path allocation rejection (standard: never reject. cdc: reject Mutation +# containing a CDC-enabled table if at space limit in cdc_raw_directory). +cdc_enabled: false + +# CommitLogSegments are moved to this directory on flush if cdc_enabled: true and the +# segment contains mutations for a CDC-enabled table. This should be placed on a +# separate spindle than the data directories. If not set, the default directory is +# $CASSANDRA_HOME/data/cdc_raw. +# cdc_raw_directory: /var/lib/cassandra/cdc_raw + +# Policy for data disk failures: +# +# die +# shut down gossip and client transports and kill the JVM for any fs errors or +# single-sstable errors, so the node can be replaced. +# +# stop_paranoid +# shut down gossip and client transports even for single-sstable errors, +# kill the JVM for errors during startup. +# +# stop +# shut down gossip and client transports, leaving the node effectively dead, but +# can still be inspected via JMX, kill the JVM for errors during startup. +# +# best_effort +# stop using the failed disk and respond to requests based on +# remaining available sstables. This means you WILL see obsolete +# data at CL.ONE! +# +# ignore +# ignore fatal errors and let requests fail, as in pre-1.2 Cassandra +disk_failure_policy: stop + +# Policy for commit disk failures: +# +# die +# shut down the node and kill the JVM, so the node can be replaced. +# +# stop +# shut down the node, leaving the node effectively dead, but +# can still be inspected via JMX. +# +# stop_commit +# shutdown the commit log, letting writes collect but +# continuing to service reads, as in pre-2.0.5 Cassandra +# +# ignore +# ignore fatal errors and let the batches fail +commit_failure_policy: stop + +# Maximum size of the native protocol prepared statement cache +# +# Valid values are either "auto" (omitting the value) or a value greater 0. +# +# Note that specifying a too large value will result in long running GCs and possbily +# out-of-memory errors. Keep the value at a small fraction of the heap. +# +# If you constantly see "prepared statements discarded in the last minute because +# cache limit reached" messages, the first step is to investigate the root cause +# of these messages and check whether prepared statements are used correctly - +# i.e. use bind markers for variable parts. +# +# Do only change the default value, if you really have more prepared statements than +# fit in the cache. In most cases it is not neccessary to change this value. +# Constantly re-preparing statements is a performance penalty. +# +# Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater +prepared_statements_cache_size_mb: + +# Maximum size of the key cache in memory. +# +# Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the +# minimum, sometimes more. The key cache is fairly tiny for the amount of +# time it saves, so it's worthwhile to use it at large numbers. +# The row cache saves even more time, but must contain the entire row, +# so it is extremely space-intensive. It's best to only use the +# row cache if you have hot rows or static rows. +# +# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. +# +# Default value is empty to make it "auto" (min(5% of Heap (in MB), 100MB)). Set to 0 to disable key cache. +key_cache_size_in_mb: + +# Duration in seconds after which Cassandra should +# save the key cache. Caches are saved to saved_caches_directory as +# specified in this configuration file. +# +# Saved caches greatly improve cold-start speeds, and is relatively cheap in +# terms of I/O for the key cache. Row cache saving is much more expensive and +# has limited use. +# +# Default is 14400 or 4 hours. +key_cache_save_period: 14400 + +# Number of keys from the key cache to save +# Disabled by default, meaning all keys are going to be saved +# key_cache_keys_to_save: 100 + +# Row cache implementation class name. Available implementations: +# +# org.apache.cassandra.cache.OHCProvider +# Fully off-heap row cache implementation (default). +# +# org.apache.cassandra.cache.SerializingCacheProvider +# This is the row cache implementation availabile +# in previous releases of Cassandra. +# row_cache_class_name: org.apache.cassandra.cache.OHCProvider + +# Maximum size of the row cache in memory. +# Please note that OHC cache implementation requires some additional off-heap memory to manage +# the map structures and some in-flight memory during operations before/after cache entries can be +# accounted against the cache capacity. This overhead is usually small compared to the whole capacity. +# Do not specify more memory that the system can afford in the worst usual situation and leave some +# headroom for OS block level cache. Do never allow your system to swap. +# +# Default value is 0, to disable row caching. +row_cache_size_in_mb: 0 + +# Duration in seconds after which Cassandra should save the row cache. +# Caches are saved to saved_caches_directory as specified in this configuration file. +# +# Saved caches greatly improve cold-start speeds, and is relatively cheap in +# terms of I/O for the key cache. Row cache saving is much more expensive and +# has limited use. +# +# Default is 0 to disable saving the row cache. +row_cache_save_period: 0 + +# Number of keys from the row cache to save. +# Specify 0 (which is the default), meaning all keys are going to be saved +# row_cache_keys_to_save: 100 + +# Maximum size of the counter cache in memory. +# +# Counter cache helps to reduce counter locks' contention for hot counter cells. +# In case of RF = 1 a counter cache hit will cause Cassandra to skip the read before +# write entirely. With RF > 1 a counter cache hit will still help to reduce the duration +# of the lock hold, helping with hot counter cell updates, but will not allow skipping +# the read entirely. Only the local (clock, count) tuple of a counter cell is kept +# in memory, not the whole counter, so it's relatively cheap. +# +# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. +# +# Default value is empty to make it "auto" (min(2.5% of Heap (in MB), 50MB)). Set to 0 to disable counter cache. +# NOTE: if you perform counter deletes and rely on low gcgs, you should disable the counter cache. +counter_cache_size_in_mb: + +# Duration in seconds after which Cassandra should +# save the counter cache (keys only). Caches are saved to saved_caches_directory as +# specified in this configuration file. +# +# Default is 7200 or 2 hours. +counter_cache_save_period: 7200 + +# Number of keys from the counter cache to save +# Disabled by default, meaning all keys are going to be saved +# counter_cache_keys_to_save: 100 + +# saved caches +# If not set, the default directory is $CASSANDRA_HOME/data/saved_caches. +# saved_caches_directory: /var/lib/cassandra/saved_caches + +# Number of seconds the server will wait for each cache (row, key, etc ...) to load while starting +# the Cassandra process. Setting this to a negative value is equivalent to disabling all cache loading on startup +# while still having the cache during runtime. +# cache_load_timeout_seconds: 30 + +# commitlog_sync may be either "periodic", "group", or "batch." +# +# When in batch mode, Cassandra won't ack writes until the commit log +# has been flushed to disk. Each incoming write will trigger the flush task. +# commitlog_sync_batch_window_in_ms is a deprecated value. Previously it had +# almost no value, and is being removed. +# +# commitlog_sync_batch_window_in_ms: 2 +# +# group mode is similar to batch mode, where Cassandra will not ack writes +# until the commit log has been flushed to disk. The difference is group +# mode will wait up to commitlog_sync_group_window_in_ms between flushes. +# +# commitlog_sync_group_window_in_ms: 1000 +# +# the default option is "periodic" where writes may be acked immediately +# and the CommitLog is simply synced every commitlog_sync_period_in_ms +# milliseconds. +commitlog_sync: periodic +commitlog_sync_period_in_ms: 10000 + +# When in periodic commitlog mode, the number of milliseconds to block writes +# while waiting for a slow disk flush to complete. +# periodic_commitlog_sync_lag_block_in_ms: + +# The size of the individual commitlog file segments. A commitlog +# segment may be archived, deleted, or recycled once all the data +# in it (potentially from each columnfamily in the system) has been +# flushed to sstables. +# +# The default size is 32, which is almost always fine, but if you are +# archiving commitlog segments (see commitlog_archiving.properties), +# then you probably want a finer granularity of archiving; 8 or 16 MB +# is reasonable. +# Max mutation size is also configurable via max_mutation_size_in_kb setting in +# cassandra.yaml. The default is half the size commitlog_segment_size_in_mb * 1024. +# This should be positive and less than 2048. +# +# NOTE: If max_mutation_size_in_kb is set explicitly then commitlog_segment_size_in_mb must +# be set to at least twice the size of max_mutation_size_in_kb / 1024 +# +commitlog_segment_size_in_mb: 32 + +# Compression to apply to the commit log. If omitted, the commit log +# will be written uncompressed. LZ4, Snappy, and Deflate compressors +# are supported. +# commitlog_compression: +# - class_name: LZ4Compressor +# parameters: +# - + +# Compression to apply to SSTables as they flush for compressed tables. +# Note that tables without compression enabled do not respect this flag. +# +# As high ratio compressors like LZ4HC, Zstd, and Deflate can potentially +# block flushes for too long, the default is to flush with a known fast +# compressor in those cases. Options are: +# +# none : Flush without compressing blocks but while still doing checksums. +# fast : Flush with a fast compressor. If the table is already using a +# fast compressor that compressor is used. +# table: Always flush with the same compressor that the table uses. This +# was the pre 4.0 behavior. +# +# flush_compression: fast + +# any class that implements the SeedProvider interface and has a +# constructor that takes a Map of parameters will do. +seed_provider: + # Addresses of hosts that are deemed contact points. + # Cassandra nodes use this list of hosts to find each other and learn + # the topology of the ring. You must change this if you are running + # multiple nodes! + - class_name: org.apache.cassandra.locator.SimpleSeedProvider + parameters: + # seeds is actually a comma-delimited list of addresses. + # Ex: ",," + - seeds: "cass1,cass2" + +# For workloads with more data than can fit in memory, Cassandra's +# bottleneck will be reads that need to fetch data from +# disk. "concurrent_reads" should be set to (16 * number_of_drives) in +# order to allow the operations to enqueue low enough in the stack +# that the OS and drives can reorder them. Same applies to +# "concurrent_counter_writes", since counter writes read the current +# values before incrementing and writing them back. +# +# On the other hand, since writes are almost never IO bound, the ideal +# number of "concurrent_writes" is dependent on the number of cores in +# your system; (8 * number_of_cores) is a good rule of thumb. +concurrent_reads: 32 +concurrent_writes: 32 +concurrent_counter_writes: 32 + +# For materialized view writes, as there is a read involved, so this should +# be limited by the less of concurrent reads or concurrent writes. +concurrent_materialized_view_writes: 32 + +# Maximum memory to use for inter-node and client-server networking buffers. +# +# Defaults to the smaller of 1/16 of heap or 128MB. This pool is allocated off-heap, +# so is in addition to the memory allocated for heap. The cache also has on-heap +# overhead which is roughly 128 bytes per chunk (i.e. 0.2% of the reserved size +# if the default 64k chunk size is used). +# Memory is only allocated when needed. +# networking_cache_size_in_mb: 128 + +# Enable the sstable chunk cache. The chunk cache will store recently accessed +# sections of the sstable in-memory as uncompressed buffers. +# file_cache_enabled: false + +# Maximum memory to use for sstable chunk cache and buffer pooling. +# 32MB of this are reserved for pooling buffers, the rest is used for chunk cache +# that holds uncompressed sstable chunks. +# Defaults to the smaller of 1/4 of heap or 512MB. This pool is allocated off-heap, +# so is in addition to the memory allocated for heap. The cache also has on-heap +# overhead which is roughly 128 bytes per chunk (i.e. 0.2% of the reserved size +# if the default 64k chunk size is used). +# Memory is only allocated when needed. +# file_cache_size_in_mb: 512 + +# Flag indicating whether to allocate on or off heap when the sstable buffer +# pool is exhausted, that is when it has exceeded the maximum memory +# file_cache_size_in_mb, beyond which it will not cache buffers but allocate on request. + +# buffer_pool_use_heap_if_exhausted: true + +# The strategy for optimizing disk read +# Possible values are: +# ssd (for solid state disks, the default) +# spinning (for spinning disks) +# disk_optimization_strategy: ssd + +# Total permitted memory to use for memtables. Cassandra will stop +# accepting writes when the limit is exceeded until a flush completes, +# and will trigger a flush based on memtable_cleanup_threshold +# If omitted, Cassandra will set both to 1/4 the size of the heap. +# memtable_heap_space_in_mb: 2048 +# memtable_offheap_space_in_mb: 2048 + +# memtable_cleanup_threshold is deprecated. The default calculation +# is the only reasonable choice. See the comments on memtable_flush_writers +# for more information. +# +# Ratio of occupied non-flushing memtable size to total permitted size +# that will trigger a flush of the largest memtable. Larger mct will +# mean larger flushes and hence less compaction, but also less concurrent +# flush activity which can make it difficult to keep your disks fed +# under heavy write load. +# +# memtable_cleanup_threshold defaults to 1 / (memtable_flush_writers + 1) +# memtable_cleanup_threshold: 0.11 + +# Specify the way Cassandra allocates and manages memtable memory. +# Options are: +# +# heap_buffers +# on heap nio buffers +# +# offheap_buffers +# off heap (direct) nio buffers +# +# offheap_objects +# off heap objects +memtable_allocation_type: heap_buffers + +# Limit memory usage for Merkle tree calculations during repairs. The default +# is 1/16th of the available heap. The main tradeoff is that smaller trees +# have less resolution, which can lead to over-streaming data. If you see heap +# pressure during repairs, consider lowering this, but you cannot go below +# one megabyte. If you see lots of over-streaming, consider raising +# this or using subrange repair. +# +# For more details see https://issues.apache.org/jira/browse/CASSANDRA-14096. +# +# repair_session_space_in_mb: + +# Total space to use for commit logs on disk. +# +# If space gets above this value, Cassandra will flush every dirty CF +# in the oldest segment and remove it. So a small total commitlog space +# will tend to cause more flush activity on less-active columnfamilies. +# +# The default value is the smaller of 8192, and 1/4 of the total space +# of the commitlog volume. +# +# commitlog_total_space_in_mb: 8192 + +# This sets the number of memtable flush writer threads per disk +# as well as the total number of memtables that can be flushed concurrently. +# These are generally a combination of compute and IO bound. +# +# Memtable flushing is more CPU efficient than memtable ingest and a single thread +# can keep up with the ingest rate of a whole server on a single fast disk +# until it temporarily becomes IO bound under contention typically with compaction. +# At that point you need multiple flush threads. At some point in the future +# it may become CPU bound all the time. +# +# You can tell if flushing is falling behind using the MemtablePool.BlockedOnAllocation +# metric which should be 0, but will be non-zero if threads are blocked waiting on flushing +# to free memory. +# +# memtable_flush_writers defaults to two for a single data directory. +# This means that two memtables can be flushed concurrently to the single data directory. +# If you have multiple data directories the default is one memtable flushing at a time +# but the flush will use a thread per data directory so you will get two or more writers. +# +# Two is generally enough to flush on a fast disk [array] mounted as a single data directory. +# Adding more flush writers will result in smaller more frequent flushes that introduce more +# compaction overhead. +# +# There is a direct tradeoff between number of memtables that can be flushed concurrently +# and flush size and frequency. More is not better you just need enough flush writers +# to never stall waiting for flushing to free memory. +# +#memtable_flush_writers: 2 + +# Total space to use for change-data-capture logs on disk. +# +# If space gets above this value, Cassandra will throw WriteTimeoutException +# on Mutations including tables with CDC enabled. A CDCCompactor is responsible +# for parsing the raw CDC logs and deleting them when parsing is completed. +# +# The default value is the min of 4096 mb and 1/8th of the total space +# of the drive where cdc_raw_directory resides. +# cdc_total_space_in_mb: 4096 + +# When we hit our cdc_raw limit and the CDCCompactor is either running behind +# or experiencing backpressure, we check at the following interval to see if any +# new space for cdc-tracked tables has been made available. Default to 250ms +# cdc_free_space_check_interval_ms: 250 + +# A fixed memory pool size in MB for for SSTable index summaries. If left +# empty, this will default to 5% of the heap size. If the memory usage of +# all index summaries exceeds this limit, SSTables with low read rates will +# shrink their index summaries in order to meet this limit. However, this +# is a best-effort process. In extreme conditions Cassandra may need to use +# more than this amount of memory. +index_summary_capacity_in_mb: + +# How frequently index summaries should be resampled. This is done +# periodically to redistribute memory from the fixed-size pool to sstables +# proportional their recent read rates. Setting to -1 will disable this +# process, leaving existing index summaries at their current sampling level. +index_summary_resize_interval_in_minutes: 60 + +# Whether to, when doing sequential writing, fsync() at intervals in +# order to force the operating system to flush the dirty +# buffers. Enable this to avoid sudden dirty buffer flushing from +# impacting read latencies. Almost always a good idea on SSDs; not +# necessarily on platters. +trickle_fsync: false +trickle_fsync_interval_in_kb: 10240 + +# TCP port, for commands and data +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +storage_port: 7000 + +# SSL port, for legacy encrypted communication. This property is unused unless enabled in +# server_encryption_options (see below). As of cassandra 4.0, this property is deprecated +# as a single port can be used for either/both secure and insecure connections. +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +ssl_storage_port: 7001 + +# Address or interface to bind to and tell other Cassandra nodes to connect to. +# You _must_ change this if you want multiple nodes to be able to communicate! +# +# Set listen_address OR listen_interface, not both. +# +# Leaving it blank leaves it up to InetAddress.getLocalHost(). This +# will always do the Right Thing _if_ the node is properly configured +# (hostname, name resolution, etc), and the Right Thing is to use the +# address associated with the hostname (it might not be). If unresolvable +# it will fall back to InetAddress.getLoopbackAddress(), which is wrong for production systems. +# +# Setting listen_address to 0.0.0.0 is always wrong. +# +listen_address: 172.20.0.6 + +# Set listen_address OR listen_interface, not both. Interfaces must correspond +# to a single address, IP aliasing is not supported. +# listen_interface: eth0 + +# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address +# you can specify which should be chosen using listen_interface_prefer_ipv6. If false the first ipv4 +# address will be used. If true the first ipv6 address will be used. Defaults to false preferring +# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6. +# listen_interface_prefer_ipv6: false + +# Address to broadcast to other Cassandra nodes +# Leaving this blank will set it to the same value as listen_address +broadcast_address: 172.20.0.6 + +# When using multiple physical network interfaces, set this +# to true to listen on broadcast_address in addition to +# the listen_address, allowing nodes to communicate in both +# interfaces. +# Ignore this property if the network configuration automatically +# routes between the public and private networks such as EC2. +# listen_on_broadcast_address: false + +# Internode authentication backend, implementing IInternodeAuthenticator; +# used to allow/disallow connections from peer nodes. +# internode_authenticator: org.apache.cassandra.auth.AllowAllInternodeAuthenticator + +# Whether to start the native transport server. +# The address on which the native transport is bound is defined by rpc_address. +start_native_transport: true +# port for the CQL native transport to listen for clients on +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +native_transport_port: 9042 +# Enabling native transport encryption in client_encryption_options allows you to either use +# encryption for the standard port or to use a dedicated, additional port along with the unencrypted +# standard native_transport_port. +# Enabling client encryption and keeping native_transport_port_ssl disabled will use encryption +# for native_transport_port. Setting native_transport_port_ssl to a different value +# from native_transport_port will use encryption for native_transport_port_ssl while +# keeping native_transport_port unencrypted. +# native_transport_port_ssl: 9142 +# The maximum threads for handling requests (note that idle threads are stopped +# after 30 seconds so there is not corresponding minimum setting). +# native_transport_max_threads: 128 +# +# The maximum size of allowed frame. Frame (requests) larger than this will +# be rejected as invalid. The default is 256MB. If you're changing this parameter, +# you may want to adjust max_value_size_in_mb accordingly. This should be positive and less than 2048. +# native_transport_max_frame_size_in_mb: 256 + +# The maximum number of concurrent client connections. +# The default is -1, which means unlimited. +# native_transport_max_concurrent_connections: -1 + +# The maximum number of concurrent client connections per source ip. +# The default is -1, which means unlimited. +# native_transport_max_concurrent_connections_per_ip: -1 + +# Controls whether Cassandra honors older, yet currently supported, protocol versions. +# The default is true, which means all supported protocols will be honored. +native_transport_allow_older_protocols: true + +# Controls when idle client connections are closed. Idle connections are ones that had neither reads +# nor writes for a time period. +# +# Clients may implement heartbeats by sending OPTIONS native protocol message after a timeout, which +# will reset idle timeout timer on the server side. To close idle client connections, corresponding +# values for heartbeat intervals have to be set on the client side. +# +# Idle connection timeouts are disabled by default. +# native_transport_idle_timeout_in_ms: 60000 + +# The address or interface to bind the native transport server to. +# +# Set rpc_address OR rpc_interface, not both. +# +# Leaving rpc_address blank has the same effect as on listen_address +# (i.e. it will be based on the configured hostname of the node). +# +# Note that unlike listen_address, you can specify 0.0.0.0, but you must also +# set broadcast_rpc_address to a value other than 0.0.0.0. +# +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +rpc_address: 0.0.0.0 + +# Set rpc_address OR rpc_interface, not both. Interfaces must correspond +# to a single address, IP aliasing is not supported. +# rpc_interface: eth1 + +# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address +# you can specify which should be chosen using rpc_interface_prefer_ipv6. If false the first ipv4 +# address will be used. If true the first ipv6 address will be used. Defaults to false preferring +# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6. +# rpc_interface_prefer_ipv6: false + +# RPC address to broadcast to drivers and other Cassandra nodes. This cannot +# be set to 0.0.0.0. If left blank, this will be set to the value of +# rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must +# be set. +broadcast_rpc_address: 172.20.0.6 + +# enable or disable keepalive on rpc/native connections +rpc_keepalive: true + +# Uncomment to set socket buffer size for internode communication +# Note that when setting this, the buffer size is limited by net.core.wmem_max +# and when not setting it it is defined by net.ipv4.tcp_wmem +# See also: +# /proc/sys/net/core/wmem_max +# /proc/sys/net/core/rmem_max +# /proc/sys/net/ipv4/tcp_wmem +# /proc/sys/net/ipv4/tcp_wmem +# and 'man tcp' +# internode_socket_send_buffer_size_in_bytes: + +# Uncomment to set socket buffer size for internode communication +# Note that when setting this, the buffer size is limited by net.core.wmem_max +# and when not setting it it is defined by net.ipv4.tcp_wmem +# internode_socket_receive_buffer_size_in_bytes: + +# Set to true to have Cassandra create a hard link to each sstable +# flushed or streamed locally in a backups/ subdirectory of the +# keyspace data. Removing these links is the operator's +# responsibility. +incremental_backups: false + +# Whether or not to take a snapshot before each compaction. Be +# careful using this option, since Cassandra won't clean up the +# snapshots for you. Mostly useful if you're paranoid when there +# is a data format change. +snapshot_before_compaction: false + +# Whether or not a snapshot is taken of the data before keyspace truncation +# or dropping of column families. The STRONGLY advised default of true +# should be used to provide data safety. If you set this flag to false, you will +# lose data on truncation or drop. +auto_snapshot: true + +# The act of creating or clearing a snapshot involves creating or removing +# potentially tens of thousands of links, which can cause significant performance +# impact, especially on consumer grade SSDs. A non-zero value here can +# be used to throttle these links to avoid negative performance impact of +# taking and clearing snapshots +snapshot_links_per_second: 0 + +# Granularity of the collation index of rows within a partition. +# Increase if your rows are large, or if you have a very large +# number of rows per partition. The competing goals are these: +# +# - a smaller granularity means more index entries are generated +# and looking up rows withing the partition by collation column +# is faster +# - but, Cassandra will keep the collation index in memory for hot +# rows (as part of the key cache), so a larger granularity means +# you can cache more hot rows +column_index_size_in_kb: 64 + +# Per sstable indexed key cache entries (the collation index in memory +# mentioned above) exceeding this size will not be held on heap. +# This means that only partition information is held on heap and the +# index entries are read from disk. +# +# Note that this size refers to the size of the +# serialized index information and not the size of the partition. +column_index_cache_size_in_kb: 2 + +# Number of simultaneous compactions to allow, NOT including +# validation "compactions" for anti-entropy repair. Simultaneous +# compactions can help preserve read performance in a mixed read/write +# workload, by mitigating the tendency of small sstables to accumulate +# during a single long running compactions. The default is usually +# fine and if you experience problems with compaction running too +# slowly or too fast, you should look at +# compaction_throughput_mb_per_sec first. +# +# concurrent_compactors defaults to the smaller of (number of disks, +# number of cores), with a minimum of 2 and a maximum of 8. +# +# If your data directories are backed by SSD, you should increase this +# to the number of cores. +#concurrent_compactors: 1 + +# Number of simultaneous repair validations to allow. If not set or set to +# a value less than 1, it defaults to the value of concurrent_compactors. +# To set a value greeater than concurrent_compactors at startup, the system +# property cassandra.allow_unlimited_concurrent_validations must be set to +# true. To dynamically resize to a value > concurrent_compactors on a running +# node, first call the bypassConcurrentValidatorsLimit method on the +# org.apache.cassandra.db:type=StorageService mbean +# concurrent_validations: 0 + +# Number of simultaneous materialized view builder tasks to allow. +concurrent_materialized_view_builders: 1 + +# Throttles compaction to the given total throughput across the entire +# system. The faster you insert data, the faster you need to compact in +# order to keep the sstable count down, but in general, setting this to +# 16 to 32 times the rate you are inserting data is more than sufficient. +# Setting this to 0 disables throttling. Note that this accounts for all types +# of compaction, including validation compaction (building Merkle trees +# for repairs). +compaction_throughput_mb_per_sec: 64 + +# When compacting, the replacement sstable(s) can be opened before they +# are completely written, and used in place of the prior sstables for +# any range that has been written. This helps to smoothly transfer reads +# between the sstables, reducing page cache churn and keeping hot rows hot +sstable_preemptive_open_interval_in_mb: 50 + +# When enabled, permits Cassandra to zero-copy stream entire eligible +# SSTables between nodes, including every component. +# This speeds up the network transfer significantly subject to +# throttling specified by stream_throughput_outbound_megabits_per_sec. +# Enabling this will reduce the GC pressure on sending and receiving node. +# When unset, the default is enabled. While this feature tries to keep the +# disks balanced, it cannot guarantee it. This feature will be automatically +# disabled if internode encryption is enabled. +# stream_entire_sstables: true + +# Throttles all outbound streaming file transfers on this node to the +# given total throughput in Mbps. This is necessary because Cassandra does +# mostly sequential IO when streaming data during bootstrap or repair, which +# can lead to saturating the network connection and degrading rpc performance. +# When unset, the default is 200 Mbps or 25 MB/s. +# stream_throughput_outbound_megabits_per_sec: 200 + +# Throttles all streaming file transfer between the datacenters, +# this setting allows users to throttle inter dc stream throughput in addition +# to throttling all network stream traffic as configured with +# stream_throughput_outbound_megabits_per_sec +# When unset, the default is 200 Mbps or 25 MB/s +# inter_dc_stream_throughput_outbound_megabits_per_sec: 200 + +# Server side timeouts for requests. The server will return a timeout exception +# to the client if it can't complete an operation within the corresponding +# timeout. Those settings are a protection against: +# 1) having client wait on an operation that might never terminate due to some +# failures. +# 2) operations that use too much CPU/read too much data (leading to memory build +# up) by putting a limit to how long an operation will execute. +# For this reason, you should avoid putting these settings too high. In other words, +# if you are timing out requests because of underlying resource constraints then +# increasing the timeout will just cause more problems. Of course putting them too +# low is equally ill-advised since clients could get timeouts even for successful +# operations just because the timeout setting is too tight. + +# How long the coordinator should wait for read operations to complete. +# Lowest acceptable value is 10 ms. +read_request_timeout_in_ms: 5000 +# How long the coordinator should wait for seq or index scans to complete. +# Lowest acceptable value is 10 ms. +range_request_timeout_in_ms: 10000 +# How long the coordinator should wait for writes to complete. +# Lowest acceptable value is 10 ms. +write_request_timeout_in_ms: 2000 +# How long the coordinator should wait for counter writes to complete. +# Lowest acceptable value is 10 ms. +counter_write_request_timeout_in_ms: 5000 +# How long a coordinator should continue to retry a CAS operation +# that contends with other proposals for the same row. +# Lowest acceptable value is 10 ms. +cas_contention_timeout_in_ms: 1000 +# How long the coordinator should wait for truncates to complete +# (This can be much longer, because unless auto_snapshot is disabled +# we need to flush first so we can snapshot before removing the data.) +# Lowest acceptable value is 10 ms. +truncate_request_timeout_in_ms: 60000 +# The default timeout for other, miscellaneous operations. +# Lowest acceptable value is 10 ms. +request_timeout_in_ms: 10000 + +# Defensive settings for protecting Cassandra from true network partitions. +# See (CASSANDRA-14358) for details. +# +# The amount of time to wait for internode tcp connections to establish. +# internode_tcp_connect_timeout_in_ms: 2000 +# +# The amount of time unacknowledged data is allowed on a connection before we throw out the connection +# Note this is only supported on Linux + epoll, and it appears to behave oddly above a setting of 30000 +# (it takes much longer than 30s) as of Linux 4.12. If you want something that high set this to 0 +# which picks up the OS default and configure the net.ipv4.tcp_retries2 sysctl to be ~8. +# internode_tcp_user_timeout_in_ms: 30000 + +# The amount of time unacknowledged data is allowed on a streaming connection. +# The default is 5 minutes. Increase it or set it to 0 in order to increase the timeout. +# internode_streaming_tcp_user_timeout_in_ms: 300000 + +# Global, per-endpoint and per-connection limits imposed on messages queued for delivery to other nodes +# and waiting to be processed on arrival from other nodes in the cluster. These limits are applied to the on-wire +# size of the message being sent or received. +# +# The basic per-link limit is consumed in isolation before any endpoint or global limit is imposed. +# Each node-pair has three links: urgent, small and large. So any given node may have a maximum of +# N*3*(internode_application_send_queue_capacity_in_bytes+internode_application_receive_queue_capacity_in_bytes) +# messages queued without any coordination between them although in practice, with token-aware routing, only RF*tokens +# nodes should need to communicate with significant bandwidth. +# +# The per-endpoint limit is imposed on all messages exceeding the per-link limit, simultaneously with the global limit, +# on all links to or from a single node in the cluster. +# The global limit is imposed on all messages exceeding the per-link limit, simultaneously with the per-endpoint limit, +# on all links to or from any node in the cluster. +# +# internode_application_send_queue_capacity_in_bytes: 4194304 #4MiB +# internode_application_send_queue_reserve_endpoint_capacity_in_bytes: 134217728 #128MiB +# internode_application_send_queue_reserve_global_capacity_in_bytes: 536870912 #512MiB +# internode_application_receive_queue_capacity_in_bytes: 4194304 #4MiB +# internode_application_receive_queue_reserve_endpoint_capacity_in_bytes: 134217728 #128MiB +# internode_application_receive_queue_reserve_global_capacity_in_bytes: 536870912 #512MiB + + +# How long before a node logs slow queries. Select queries that take longer than +# this timeout to execute, will generate an aggregated log message, so that slow queries +# can be identified. Set this value to zero to disable slow query logging. +slow_query_log_timeout_in_ms: 500 + +# Enable operation timeout information exchange between nodes to accurately +# measure request timeouts. If disabled, replicas will assume that requests +# were forwarded to them instantly by the coordinator, which means that +# under overload conditions we will waste that much extra time processing +# already-timed-out requests. +# +# Warning: It is generally assumed that users have setup NTP on their clusters, and that clocks are modestly in sync, +# since this is a requirement for general correctness of last write wins. +#cross_node_timeout: true + +# Set keep-alive period for streaming +# This node will send a keep-alive message periodically with this period. +# If the node does not receive a keep-alive message from the peer for +# 2 keep-alive cycles the stream session times out and fail +# Default value is 300s (5 minutes), which means stalled stream +# times out in 10 minutes by default +# streaming_keep_alive_period_in_secs: 300 + +# Limit number of connections per host for streaming +# Increase this when you notice that joins are CPU-bound rather that network +# bound (for example a few nodes with big files). +# streaming_connections_per_host: 1 + + +# phi value that must be reached for a host to be marked down. +# most users should never need to adjust this. +# phi_convict_threshold: 8 + +# endpoint_snitch -- Set this to a class that implements +# IEndpointSnitch. The snitch has two functions: +# +# - it teaches Cassandra enough about your network topology to route +# requests efficiently +# - it allows Cassandra to spread replicas around your cluster to avoid +# correlated failures. It does this by grouping machines into +# "datacenters" and "racks." Cassandra will do its best not to have +# more than one replica on the same "rack" (which may not actually +# be a physical location) +# +# CASSANDRA WILL NOT ALLOW YOU TO SWITCH TO AN INCOMPATIBLE SNITCH +# ONCE DATA IS INSERTED INTO THE CLUSTER. This would cause data loss. +# This means that if you start with the default SimpleSnitch, which +# locates every node on "rack1" in "datacenter1", your only options +# if you need to add another datacenter are GossipingPropertyFileSnitch +# (and the older PFS). From there, if you want to migrate to an +# incompatible snitch like Ec2Snitch you can do it by adding new nodes +# under Ec2Snitch (which will locate them in a new "datacenter") and +# decommissioning the old ones. +# +# Out of the box, Cassandra provides: +# +# SimpleSnitch: +# Treats Strategy order as proximity. This can improve cache +# locality when disabling read repair. Only appropriate for +# single-datacenter deployments. +# +# GossipingPropertyFileSnitch +# This should be your go-to snitch for production use. The rack +# and datacenter for the local node are defined in +# cassandra-rackdc.properties and propagated to other nodes via +# gossip. If cassandra-topology.properties exists, it is used as a +# fallback, allowing migration from the PropertyFileSnitch. +# +# PropertyFileSnitch: +# Proximity is determined by rack and data center, which are +# explicitly configured in cassandra-topology.properties. +# +# Ec2Snitch: +# Appropriate for EC2 deployments in a single Region. Loads Region +# and Availability Zone information from the EC2 API. The Region is +# treated as the datacenter, and the Availability Zone as the rack. +# Only private IPs are used, so this will not work across multiple +# Regions. +# +# Ec2MultiRegionSnitch: +# Uses public IPs as broadcast_address to allow cross-region +# connectivity. (Thus, you should set seed addresses to the public +# IP as well.) You will need to open the storage_port or +# ssl_storage_port on the public IP firewall. (For intra-Region +# traffic, Cassandra will switch to the private IP after +# establishing a connection.) +# +# RackInferringSnitch: +# Proximity is determined by rack and data center, which are +# assumed to correspond to the 3rd and 2nd octet of each node's IP +# address, respectively. Unless this happens to match your +# deployment conventions, this is best used as an example of +# writing a custom Snitch class and is provided in that spirit. +# +# You can use a custom Snitch by setting this to the full class name +# of the snitch, which will be assumed to be on your classpath. +endpoint_snitch: GossipingPropertyFileSnitch + +# controls how often to perform the more expensive part of host score +# calculation +dynamic_snitch_update_interval_in_ms: 100 +# controls how often to reset all host scores, allowing a bad host to +# possibly recover +dynamic_snitch_reset_interval_in_ms: 600000 +# if set greater than zero, this will allow +# 'pinning' of replicas to hosts in order to increase cache capacity. +# The badness threshold will control how much worse the pinned host has to be +# before the dynamic snitch will prefer other replicas over it. This is +# expressed as a double which represents a percentage. Thus, a value of +# 0.2 means Cassandra would continue to prefer the static snitch values +# until the pinned host was 20% worse than the fastest. +dynamic_snitch_badness_threshold: 1.0 + +# Configure server-to-server internode encryption +# +# JVM and netty defaults for supported SSL socket protocols and cipher suites can +# be replaced using custom encryption options. This is not recommended +# unless you have policies in place that dictate certain settings, or +# need to disable vulnerable ciphers or protocols in case the JVM cannot +# be updated. +# +# FIPS compliant settings can be configured at JVM level and should not +# involve changing encryption settings here: +# https://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/FIPS.html +# +# **NOTE** this default configuration is an insecure configuration. If you need to +# enable server-to-server encryption generate server keystores (and truststores for mutual +# authentication) per: +# http://download.oracle.com/javase/8/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore +# Then perform the following configuration changes: +# +# Step 1: Set internode_encryption= and explicitly set optional=true. Restart all nodes +# +# Step 2: Set optional=false (or remove it) and if you generated truststores and want to use mutual +# auth set require_client_auth=true. Restart all nodes +server_encryption_options: + # On outbound connections, determine which type of peers to securely connect to. + # The available options are : + # none : Do not encrypt outgoing connections + # dc : Encrypt connections to peers in other datacenters but not within datacenters + # rack : Encrypt connections to peers in other racks but not within racks + # all : Always use encrypted connections + internode_encryption: none + # When set to true, encrypted and unencrypted connections are allowed on the storage_port + # This should _only be true_ while in unencrypted or transitional operation + # optional defaults to true if internode_encryption is none + # optional: true + # If enabled, will open up an encrypted listening socket on ssl_storage_port. Should only be used + # during upgrade to 4.0; otherwise, set to false. + enable_legacy_ssl_storage_port: false + # Set to a valid keystore if internode_encryption is dc, rack or all + keystore: conf/.keystore + keystore_password: cassandra + # Verify peer server certificates + require_client_auth: false + # Set to a valid trustore if require_client_auth is true + truststore: conf/.truststore + truststore_password: cassandra + # Verify that the host name in the certificate matches the connected host + require_endpoint_verification: false + # More advanced defaults: + # protocol: TLS + # store_type: JKS + # cipher_suites: [ + # TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, + # TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA, + # TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, TLS_RSA_WITH_AES_128_GCM_SHA256, TLS_RSA_WITH_AES_128_CBC_SHA, + # TLS_RSA_WITH_AES_256_CBC_SHA + # ] + +# Configure client-to-server encryption. +# +# **NOTE** this default configuration is an insecure configuration. If you need to +# enable client-to-server encryption generate server keystores (and truststores for mutual +# authentication) per: +# http://download.oracle.com/javase/8/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore +# Then perform the following configuration changes: +# +# Step 1: Set enabled=true and explicitly set optional=true. Restart all nodes +# +# Step 2: Set optional=false (or remove it) and if you generated truststores and want to use mutual +# auth set require_client_auth=true. Restart all nodes +client_encryption_options: + # Enable client-to-server encryption + enabled: false + # When set to true, encrypted and unencrypted connections are allowed on the native_transport_port + # This should _only be true_ while in unencrypted or transitional operation + # optional defaults to true when enabled is false, and false when enabled is true. + # optional: true + # Set keystore and keystore_password to valid keystores if enabled is true + keystore: conf/.keystore + keystore_password: cassandra + # Verify client certificates + require_client_auth: false + # Set trustore and truststore_password if require_client_auth is true + # truststore: conf/.truststore + # truststore_password: cassandra + # More advanced defaults: + # protocol: TLS + # store_type: JKS + # cipher_suites: [ + # TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, + # TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA, + # TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, TLS_RSA_WITH_AES_128_GCM_SHA256, TLS_RSA_WITH_AES_128_CBC_SHA, + # TLS_RSA_WITH_AES_256_CBC_SHA + # ] + +# internode_compression controls whether traffic between nodes is +# compressed. +# Can be: +# +# all +# all traffic is compressed +# +# dc +# traffic between different datacenters is compressed +# +# none +# nothing is compressed. +internode_compression: dc + +# Enable or disable tcp_nodelay for inter-dc communication. +# Disabling it will result in larger (but fewer) network packets being sent, +# reducing overhead from the TCP protocol itself, at the cost of increasing +# latency if you block for cross-datacenter responses. +inter_dc_tcp_nodelay: false + +# TTL for different trace types used during logging of the repair process. +tracetype_query_ttl: 86400 +tracetype_repair_ttl: 604800 + +# If unset, all GC Pauses greater than gc_log_threshold_in_ms will log at +# INFO level +# UDFs (user defined functions) are disabled by default. +# As of Cassandra 3.0 there is a sandbox in place that should prevent execution of evil code. +enable_user_defined_functions: false + +# Enables scripted UDFs (JavaScript UDFs). +# Java UDFs are always enabled, if enable_user_defined_functions is true. +# Enable this option to be able to use UDFs with "language javascript" or any custom JSR-223 provider. +# This option has no effect, if enable_user_defined_functions is false. +enable_scripted_user_defined_functions: false + +# The default Windows kernel timer and scheduling resolution is 15.6ms for power conservation. +# Lowering this value on Windows can provide much tighter latency and better throughput, however +# some virtualized environments may see a negative performance impact from changing this setting +# below their system default. The sysinternals 'clockres' tool can confirm your system's default +# setting. +windows_timer_interval: 1 + + +# Enables encrypting data at-rest (on disk). Different key providers can be plugged in, but the default reads from +# a JCE-style keystore. A single keystore can hold multiple keys, but the one referenced by +# the "key_alias" is the only key that will be used for encrypt opertaions; previously used keys +# can still (and should!) be in the keystore and will be used on decrypt operations +# (to handle the case of key rotation). +# +# It is strongly recommended to download and install Java Cryptography Extension (JCE) +# Unlimited Strength Jurisdiction Policy Files for your version of the JDK. +# (current link: http://www.oracle.com/technetwork/java/javase/downloads/jce8-download-2133166.html) +# +# Currently, only the following file types are supported for transparent data encryption, although +# more are coming in future cassandra releases: commitlog, hints +transparent_data_encryption_options: + enabled: false + chunk_length_kb: 64 + cipher: AES/CBC/PKCS5Padding + key_alias: testing:1 + # CBC IV length for AES needs to be 16 bytes (which is also the default size) + # iv_length: 16 + key_provider: + - class_name: org.apache.cassandra.security.JKSKeyProvider + parameters: + - keystore: conf/.keystore + keystore_password: cassandra + store_type: JCEKS + key_password: cassandra + + +##################### +# SAFETY THRESHOLDS # +##################### + +# When executing a scan, within or across a partition, we need to keep the +# tombstones seen in memory so we can return them to the coordinator, which +# will use them to make sure other replicas also know about the deleted rows. +# With workloads that generate a lot of tombstones, this can cause performance +# problems and even exaust the server heap. +# (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets) +# Adjust the thresholds here if you understand the dangers and want to +# scan more tombstones anyway. These thresholds may also be adjusted at runtime +# using the StorageService mbean. +tombstone_warn_threshold: 1000 +tombstone_failure_threshold: 100000 + +# Filtering and secondary index queries at read consistency levels above ONE/LOCAL_ONE use a +# mechanism called replica filtering protection to ensure that results from stale replicas do +# not violate consistency. (See CASSANDRA-8272 and CASSANDRA-15907 for more details.) This +# mechanism materializes replica results by partition on-heap at the coordinator. The more possibly +# stale results returned by the replicas, the more rows materialized during the query. +replica_filtering_protection: + # These thresholds exist to limit the damage severely out-of-date replicas can cause during these + # queries. They limit the number of rows from all replicas individual index and filtering queries + # can materialize on-heap to return correct results at the desired read consistency level. + # + # "cached_replica_rows_warn_threshold" is the per-query threshold at which a warning will be logged. + # "cached_replica_rows_fail_threshold" is the per-query threshold at which the query will fail. + # + # These thresholds may also be adjusted at runtime using the StorageService mbean. + # + # If the failure threshold is breached, it is likely that either the current page/fetch size + # is too large or one or more replicas is severely out-of-sync and in need of repair. + cached_rows_warn_threshold: 2000 + cached_rows_fail_threshold: 32000 + +# Log WARN on any multiple-partition batch size exceeding this value. 5kb per batch by default. +# Caution should be taken on increasing the size of this threshold as it can lead to node instability. +batch_size_warn_threshold_in_kb: 5 + +# Fail any multiple-partition batch exceeding this value. 50kb (10x warn threshold) by default. +batch_size_fail_threshold_in_kb: 50 + +# Log WARN on any batches not of type LOGGED than span across more partitions than this limit +unlogged_batch_across_partitions_warn_threshold: 10 + +# Log a warning when compacting partitions larger than this value +compaction_large_partition_warning_threshold_mb: 100 + +# GC Pauses greater than 200 ms will be logged at INFO level +# This threshold can be adjusted to minimize logging if necessary +# gc_log_threshold_in_ms: 200 + +# GC Pauses greater than gc_warn_threshold_in_ms will be logged at WARN level +# Adjust the threshold based on your application throughput requirement. Setting to 0 +# will deactivate the feature. +# gc_warn_threshold_in_ms: 1000 + +# Maximum size of any value in SSTables. Safety measure to detect SSTable corruption +# early. Any value size larger than this threshold will result into marking an SSTable +# as corrupted. This should be positive and less than 2048. +# max_value_size_in_mb: 256 + +# Track a metric per keyspace indicating whether replication achieved the ideal consistency +# level for writes without timing out. This is different from the consistency level requested by +# each write which may be lower in order to facilitate availability. +# ideal_consistency_level: EACH_QUORUM + +# Automatically upgrade sstables after upgrade - if there is no ordinary compaction to do, the +# oldest non-upgraded sstable will get upgraded to the latest version +# automatic_sstable_upgrade: false +# Limit the number of concurrent sstable upgrades +# max_concurrent_automatic_sstable_upgrades: 1 + +# Audit logging - Logs every incoming CQL command request, authentication to a node. See the docs +# on audit_logging for full details about the various configuration options. +audit_logging_options: + enabled: false + logger: + - class_name: BinAuditLogger + # audit_logs_dir: + # included_keyspaces: + # excluded_keyspaces: system, system_schema, system_virtual_schema + # included_categories: + # excluded_categories: + # included_users: + # excluded_users: + # roll_cycle: HOURLY + # block: true + # max_queue_weight: 268435456 # 256 MiB + # max_log_size: 17179869184 # 16 GiB + ## archive command is "/path/to/script.sh %path" where %path is replaced with the file being rolled: + # archive_command: + # max_archive_retries: 10 + + +# default options for full query logging - these can be overridden from command line when executing +# nodetool enablefullquerylog +#full_query_logging_options: + # log_dir: + # roll_cycle: HOURLY + # block: true + # max_queue_weight: 268435456 # 256 MiB + # max_log_size: 17179869184 # 16 GiB + ## archive command is "/path/to/script.sh %path" where %path is replaced with the file being rolled: + # archive_command: + # max_archive_retries: 10 + +# validate tombstones on reads and compaction +# can be either "disabled", "warn" or "exception" +# corrupted_tombstone_strategy: disabled + +# Diagnostic Events # +# If enabled, diagnostic events can be helpful for troubleshooting operational issues. Emitted events contain details +# on internal state and temporal relationships across events, accessible by clients via JMX. +diagnostic_events_enabled: false + +# Use native transport TCP message coalescing. If on upgrade to 4.0 you found your throughput decreasing, and in +# particular you run an old kernel or have very fewer client connections, this option might be worth evaluating. +#native_transport_flush_in_batches_legacy: false + +# Enable tracking of repaired state of data during reads and comparison between replicas +# Mismatches between the repaired sets of replicas can be characterized as either confirmed +# or unconfirmed. In this context, unconfirmed indicates that the presence of pending repair +# sessions, unrepaired partition tombstones, or some other condition means that the disparity +# cannot be considered conclusive. Confirmed mismatches should be a trigger for investigation +# as they may be indicative of corruption or data loss. +# There are separate flags for range vs partition reads as single partition reads are only tracked +# when CL > 1 and a digest mismatch occurs. Currently, range queries don't use digests so if +# enabled for range reads, all range reads will include repaired data tracking. As this adds +# some overhead, operators may wish to disable it whilst still enabling it for partition reads +repaired_data_tracking_for_range_reads_enabled: false +repaired_data_tracking_for_partition_reads_enabled: false +# If false, only confirmed mismatches will be reported. If true, a separate metric for unconfirmed +# mismatches will also be recorded. This is to avoid potential signal:noise issues are unconfirmed +# mismatches are less actionable than confirmed ones. +report_unconfirmed_repaired_data_mismatches: false + +# Having many tables and/or keyspaces negatively affects performance of many operations in the +# cluster. When the number of tables/keyspaces in the cluster exceeds the following thresholds +# a client warning will be sent back to the user when creating a table or keyspace. +# table_count_warn_threshold: 150 +# keyspace_count_warn_threshold: 40 + +######################### +# EXPERIMENTAL FEATURES # +######################### + +# Enables materialized view creation on this node. +# Materialized views are considered experimental and are not recommended for production use. +enable_materialized_views: true + +# Enables SASI index creation on this node. +# SASI indexes are considered experimental and are not recommended for production use. +enable_sasi_indexes: false + +# Enables creation of transiently replicated keyspaces on this node. +# Transient replication is experimental and is not recommended for production use. +enable_transient_replication: false + +# Enables the used of 'ALTER ... DROP COMPACT STORAGE' statements on this node. +# 'ALTER ... DROP COMPACT STORAGE' is considered experimental and is not recommended for production use. +enable_drop_compact_storage: false diff --git a/etc/_copy_cass1/commitlog_archiving.properties b/etc/_copy_cass1/commitlog_archiving.properties new file mode 100644 index 0000000..393259c --- /dev/null +++ b/etc/_copy_cass1/commitlog_archiving.properties @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# commitlog archiving configuration. Leave blank to disable. + +# Command to execute to archive a commitlog segment +# Parameters: %path => Fully qualified path of the segment to archive +# %name => Name of the commit log. +# Example: archive_command=/bin/ln %path /backup/%name +# +# Limitation: *_command= expects one command with arguments. STDOUT +# and STDIN or multiple commands cannot be executed. You might want +# to script multiple commands and add a pointer here. +archive_command= + +# Command to execute to make an archived commitlog live again. +# Parameters: %from is the full path to an archived commitlog segment (from restore_directories) +# %to is the live commitlog directory +# Example: restore_command=/bin/cp -f %from %to +restore_command= + +# Directory to scan the recovery files in. +restore_directories= + +# Restore mutations created up to and including this timestamp in GMT. +# Format: yyyy:MM:dd HH:mm:ss (2012:04:31 20:43:12) +# +# Recovery will continue through the segment when the first client-supplied +# timestamp greater than this time is encountered, but only mutations less than +# or equal to this timestamp will be applied. +restore_point_in_time= + +# precision of the timestamp used in the inserts (MILLISECONDS, MICROSECONDS, ...) +precision=MICROSECONDS diff --git a/etc/_copy_cass1/cqlshrc.sample b/etc/_copy_cass1/cqlshrc.sample new file mode 100644 index 0000000..2c00d4a --- /dev/null +++ b/etc/_copy_cass1/cqlshrc.sample @@ -0,0 +1,238 @@ +; Licensed to the Apache Software Foundation (ASF) under one +; or more contributor license agreements. See the NOTICE file +; distributed with this work for additional information +; regarding copyright ownership. The ASF licenses this file +; to you under the Apache License, Version 2.0 (the +; "License"); you may not use this file except in compliance +; with the License. You may obtain a copy of the License at +; +; http://www.apache.org/licenses/LICENSE-2.0 +; +; Unless required by applicable law or agreed to in writing, +; software distributed under the License is distributed on an +; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +; KIND, either express or implied. See the License for the +; specific language governing permissions and limitations +; under the License. +; +; Sample ~/.cqlshrc file. + +[authentication] +;; If Cassandra has auth enabled, fill out these options +; username = fred +; password = !!bang!!$ +; keyspace = ks1 + + + +[ui] +;; Whether or not to display query results with colors +; color = on + +;; Used for displaying timestamps (and reading them with COPY) +; time_format = %Y-%m-%d %H:%M:%S%z + +;; Display timezone +;timezone = Etc/UTC + +;; The number of digits displayed after the decimal point for single and double precision numbers +;; (note that increasing this to large numbers can result in unusual values) +;float_precision = 5 +;double_precision = 12 + +;; Used for automatic completion and suggestions +; completekey = tab + +;; The encoding used for characters +; encoding = utf8 + +; To use another than the system default browser for cqlsh HELP to open +; the CQL doc HTML, use the 'browser' preference. +; If the field value is empty or not specified, cqlsh will use the +; default browser (specifying 'browser = default' does not work). +; +; Supported browsers are those supported by the Python webbrowser module. +; (https://docs.python.org/3/library/webbrowser.html). +; +; Hint: to use Google Chome, use +; 'browser = open -a /Applications/Google\ Chrome.app %s' on Mac OS X and +; 'browser = /usr/bin/google-chrome-stable %s' on Linux and +; 'browser = C:/Program Files (x86)/Google/Chrome/Application/chrome.exe %s' on Windows. +; +; This setting can be overridden with the --browser command line option. +; +;browser = + +[cql] +;; A version of CQL to use (this should almost never be set) +; version = 3.2.1 + + + +[connection] + +;; The host to connect to +hostname = 127.0.0.1 + +;; The port to connect to (9042 is the native protocol default) +port = 9042 + +;; Always connect using SSL - false by default +; ssl = true + +;; A timeout in seconds for opening new connections +; timeout = 10 + +;; A timeout in seconds for executing queries +; request_timeout = 10 + + + +[csv] +;; The size limit for parsed fields +; field_size_limit = 131072 + + + +[tracing] +;; The max number of seconds to wait for a trace to complete +; max_trace_wait = 10.0 + + + +;[ssl] +; certfile = ~/keys/cassandra.cert + +;; Optional - true by default. +;validate = true + +;; To be provided when require_client_auth=true +;userkey = ~/key.pem + +;; To be provided when require_client_auth=true +;usercert = ~/cert.pem + + + +;; Optional section, overrides default certfile in [ssl] section, if present +; [certfiles] +; 192.168.1.3 = ~/keys/cassandra01.cert +; 192.168.1.4 = ~/keys/cassandra02.cert + + + +;; Options that are common to both COPY TO and COPY FROM +; [copy] + +;; The string placeholder for null values +; nullval = null + +;; For COPY TO, controls whether the first line in the CSV output file will +;; contain the column names. For COPY FROM, specifies whether the first +;; line in the CSV file contains column names. +; header = false + +;; The character that is used as the decimal point separator +; decimalsep = . + +;; The character that is used to separate thousands +;; (defaults to the empty string) +; thousandssep = + +;; The string literal format for boolean values +; boolstyle = True,False + +;; The number of child worker processes to create for +;; COPY tasks. Defaults to a max of 4 for COPY FROM and 16 +;; for COPY TO. However, at most (num_cores - 1) processes +;; will be created. +; numprocesses = + +;; The maximum number of failed attempts to fetch a range of data (when using +;; COPY TO) or insert a chunk of data (when using COPY FROM) before giving up +; maxattempts = 5 + +;; How often status updates are refreshed, in seconds +; reportfrequency = 0.25 + +;; An optional file to output rate statistics to +; ratefile = + + + +;; Options specific to COPY TO +; [copy-to] + +;; The maximum number token ranges to fetch simultaneously +; maxrequests = 6 + +;; The number of rows to fetch in a single page +; pagesize = 1000 + +;; By default the page timeout is 10 seconds per 1000 entries +;; in the page size or 10 seconds if pagesize is smaller +; pagetimeout = 10 + +;; Token range to export. Defaults to exporting the full ring. +; begintoken = +; endtoken = + +; The maximum size of the output file measured in number of lines; +; beyond this maximum the output file will be split into segments. +; -1 means unlimited. +; maxoutputsize = -1 + +;; The encoding used for characters +; encoding = utf8 + + + +;; Options specific to COPY FROM +; [copy-from] + +;; The maximum number of rows to process per second +; ingestrate = 100000 + +;; The maximum number of rows to import (-1 means unlimited) +; maxrows = -1 + +;; A number of initial rows to skip +; skiprows = 0 + +;; A comma-separated list of column names to ignore +; skipcols = + +;; The maximum global number of parsing errors to ignore, -1 means unlimited +; maxparseerrors = -1 + +;; The maximum global number of insert errors to ignore, -1 means unlimited +; maxinserterrors = 1000 + +;; A file to store all rows that could not be imported, by default this is +;; import__.err where is your keyspace and
is your table name. +; errfile = + +;; The min and max number of rows inserted in a single batch +; maxbatchsize = 20 +; minbatchsize = 2 + +;; The number of rows that are passed to child worker processes from +;; the main process at a time +; chunksize = 1000 + + + +;; The options for COPY can also be specified per-table. The following +;; three sections demonstrate this. + +;; Optional table-specific options for COPY +; [copy:mykeyspace.mytable] +; chunksize = 1000 + +;; Optional table-specific options for COPY FROM +; [copy-from:mykeyspace.mytable] +; ingestrate = 20000 + +;; Optional table-specific options for COPY TO +; [copy-to:mykeyspace.mytable] +; pagetimeout = 30 diff --git a/etc/_copy_cass1/hotspot_compiler b/etc/_copy_cass1/hotspot_compiler new file mode 100644 index 0000000..ebed7ce --- /dev/null +++ b/etc/_copy_cass1/hotspot_compiler @@ -0,0 +1,32 @@ +dontinline org.apache.cassandra.db.Columns$Serializer::deserializeLargeSubset (Lorg.apache.cassandra.io.util.DataInputPlus;Lorg.apache.cassandra.db.Columns;I)Lorg.apache.cassandra.db.Columns; +dontinline org.apache.cassandra.db.Columns$Serializer::serializeLargeSubset (Ljava.util.Collection;ILorg.apache.cassandra.db.Columns;ILorg.apache.cassandra.io.util.DataOutputPlus;)V +dontinline org.apache.cassandra.db.Columns$Serializer::serializeLargeSubsetSize (Ljava.util.Collection;ILorg.apache.cassandra.db.Columns;I)I +dontinline org.apache.cassandra.db.commitlog.AbstractCommitLogSegmentManager::advanceAllocatingFrom (Lorg.apache.cassandra.db.commitlog.CommitLogSegment;)V +dontinline org.apache.cassandra.db.transform.BaseIterator::tryGetMoreContents ()Z +dontinline org.apache.cassandra.db.transform.StoppingTransformation::stop ()V +dontinline org.apache.cassandra.db.transform.StoppingTransformation::stopInPartition ()V +dontinline org.apache.cassandra.io.util.BufferedDataOutputStreamPlus::doFlush (I)V +dontinline org.apache.cassandra.io.util.BufferedDataOutputStreamPlus::writeSlow (JI)V +dontinline org.apache.cassandra.io.util.RebufferingInputStream::readPrimitiveSlowly (I)J +exclude org.apache.cassandra.utils.JVMStabilityInspector::forceHeapSpaceOomMaybe (Ljava.lang.OutOfMemoryError;)V +inline org.apache.cassandra.db.rows.UnfilteredSerializer::serializeRowBody (Lorg.apache.cassandra.db.rows.Row;ILorg.apache.cassandra.db.rows.SerializationHelper;Lorg.apache.cassandra.io.util.DataOutputPlus;)V +inline org.apache.cassandra.io.util.Memory::checkBounds (JJ)V +inline org.apache.cassandra.io.util.SafeMemory::checkBounds (JJ)V +inline org.apache.cassandra.net.FrameDecoderWith8bHeader::decode (Ljava.util.Collection;Lorg.apache.cassandra.net.ShareableBytes;I)V +inline org.apache.cassandra.service.reads.repair.RowIteratorMergeListener::applyToPartition (ILjava.util.function.Consumer;)V +inline org.apache.cassandra.utils.AsymmetricOrdering::selectBoundary (Lorg.apache.cassandra.utils.AsymmetricOrdering.Op;II)I +inline org.apache.cassandra.utils.AsymmetricOrdering::strictnessOfLessThan (Lorg.apache.cassandra.utils.AsymmetricOrdering.Op;)I +inline org.apache.cassandra.utils.BloomFilter::indexes (Lorg.apache.cassandra.utils.IFilter.FilterKey;)[J +inline org.apache.cassandra.utils.BloomFilter::setIndexes (JJIJ[J)V +inline org.apache.cassandra.utils.ByteBufferUtil::compare (Ljava.nio.ByteBuffer;[B)I +inline org.apache.cassandra.utils.ByteBufferUtil::compare ([BLjava.nio.ByteBuffer;)I +inline org.apache.cassandra.utils.ByteBufferUtil::compareUnsigned (Ljava.nio.ByteBuffer;Ljava.nio.ByteBuffer;)I +inline org.apache.cassandra.utils.FastByteOperations$UnsafeOperations::compareTo (Ljava.lang.Object;JILjava.lang.Object;JI)I +inline org.apache.cassandra.utils.FastByteOperations$UnsafeOperations::compareTo (Ljava.lang.Object;JILjava.nio.ByteBuffer;)I +inline org.apache.cassandra.utils.FastByteOperations$UnsafeOperations::compareTo (Ljava.nio.ByteBuffer;Ljava.nio.ByteBuffer;)I +inline org.apache.cassandra.utils.memory.BufferPool$LocalPool::tryGetInternal (IZ)Ljava.nio.ByteBuffer; +inline org.apache.cassandra.utils.vint.VIntCoding::encodeUnsignedVInt (JI)[B +inline org.apache.cassandra.utils.vint.VIntCoding::encodeUnsignedVInt (JI[B)V +inline org.apache.cassandra.utils.vint.VIntCoding::writeUnsignedVInt (JLjava.io.DataOutput;)V +inline org.apache.cassandra.utils.vint.VIntCoding::writeUnsignedVInt (JLjava.nio.ByteBuffer;)V +inline org.apache.cassandra.utils.vint.VIntCoding::writeVInt (JLjava.io.DataOutput;)V diff --git a/etc/_copy_cass1/jvm-clients.options b/etc/_copy_cass1/jvm-clients.options new file mode 100644 index 0000000..6181ed0 --- /dev/null +++ b/etc/_copy_cass1/jvm-clients.options @@ -0,0 +1,10 @@ +########################################################################### +# jvm-clients.options # +# # +# See jvm8-clients.options and jvm11-clients.options for Java version # +# specific options. # +########################################################################### + +# intentionally left empty + +# The newline in the end of file is intentional diff --git a/etc/_copy_cass1/jvm-server.options b/etc/_copy_cass1/jvm-server.options new file mode 100644 index 0000000..46967f4 --- /dev/null +++ b/etc/_copy_cass1/jvm-server.options @@ -0,0 +1,188 @@ +########################################################################### +# jvm-server.options # +# # +# - all flags defined here will be used by cassandra to startup the JVM # +# - one flag should be specified per line # +# - lines that do not start with '-' will be ignored # +# - only static flags are accepted (no variables or parameters) # +# - dynamic flags will be appended to these on cassandra-env # +# # +# See jvm8-server.options and jvm11-server.options for Java version # +# specific options. # +########################################################################### + +###################### +# STARTUP PARAMETERS # +###################### + +# Uncomment any of the following properties to enable specific startup parameters + +# In a multi-instance deployment, multiple Cassandra instances will independently assume that all +# CPU processors are available to it. This setting allows you to specify a smaller set of processors +# and perhaps have affinity. +#-Dcassandra.available_processors=number_of_processors + +# The directory location of the cassandra.yaml file. +#-Dcassandra.config=directory + +# Sets the initial partitioner token for a node the first time the node is started. +#-Dcassandra.initial_token=token + +# Set to false to start Cassandra on a node but not have the node join the cluster. +#-Dcassandra.join_ring=true|false + +# Set to false to clear all gossip state for the node on restart. Use when you have changed node +# information in cassandra.yaml (such as listen_address). +#-Dcassandra.load_ring_state=true|false + +# Enable pluggable metrics reporter. See Pluggable metrics reporting in Cassandra 2.0.2. +#-Dcassandra.metricsReporterConfigFile=file + +# Set the port on which the CQL native transport listens for clients. (Default: 9042) +#-Dcassandra.native_transport_port=port + +# Overrides the partitioner. (Default: org.apache.cassandra.dht.Murmur3Partitioner) +#-Dcassandra.partitioner=partitioner + +# To replace a node that has died, restart a new node in its place specifying the address of the +# dead node. The new node must not have any data in its data directory, that is, it must be in the +# same state as before bootstrapping. +#-Dcassandra.replace_address=listen_address or broadcast_address of dead node + +# Allow restoring specific tables from an archived commit log. +#-Dcassandra.replayList=table + +# Allows overriding of the default RING_DELAY (30000ms), which is the amount of time a node waits +# before joining the ring. +#-Dcassandra.ring_delay_ms=ms + +# Set the SSL port for encrypted communication. (Default: 7001) +#-Dcassandra.ssl_storage_port=port + +# Set the port for inter-node communication. (Default: 7000) +#-Dcassandra.storage_port=port + +# Set the default location for the trigger JARs. (Default: conf/triggers) +#-Dcassandra.triggers_dir=directory + +# For testing new compaction and compression strategies. It allows you to experiment with different +# strategies and benchmark write performance differences without affecting the production workload. +#-Dcassandra.write_survey=true + +# To disable configuration via JMX of auth caches (such as those for credentials, permissions and +# roles). This will mean those config options can only be set (persistently) in cassandra.yaml +# and will require a restart for new values to take effect. +#-Dcassandra.disable_auth_caches_remote_configuration=true + +# To disable dynamic calculation of the page size used when indexing an entire partition (during +# initial index build/rebuild). If set to true, the page size will be fixed to the default of +# 10000 rows per page. +#-Dcassandra.force_default_indexing_page_size=true + +# Imposes an upper bound on hint lifetime below the normal min gc_grace_seconds +#-Dcassandra.maxHintTTL=max_hint_ttl_in_seconds + +######################## +# GENERAL JVM SETTINGS # +######################## + +# enable assertions. highly suggested for correct application functionality. +-ea + +# disable assertions for net.openhft.** because it runs out of memory by design +# if enabled and run for more than just brief testing +-da:net.openhft... + +# enable thread priorities, primarily so we can give periodic tasks +# a lower priority to avoid interfering with client workload +-XX:+UseThreadPriorities + +# Enable heap-dump if there's an OOM +-XX:+HeapDumpOnOutOfMemoryError + +# Per-thread stack size. +-Xss256k + +# Make sure all memory is faulted and zeroed on startup. +# This helps prevent soft faults in containers and makes +# transparent hugepage allocation more effective. +-XX:+AlwaysPreTouch + +# Disable biased locking as it does not benefit Cassandra. +-XX:-UseBiasedLocking + +# Enable thread-local allocation blocks and allow the JVM to automatically +# resize them at runtime. +-XX:+UseTLAB +-XX:+ResizeTLAB +-XX:+UseNUMA + +# http://www.evanjones.ca/jvm-mmap-pause.html +-XX:+PerfDisableSharedMem + +# Prefer binding to IPv4 network intefaces (when net.ipv6.bindv6only=1). See +# http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6342561 (short version: +# comment out this entry to enable IPv6 support). +-Djava.net.preferIPv4Stack=true + +### Debug options + +# uncomment to enable flight recorder +#-XX:+UnlockCommercialFeatures +#-XX:+FlightRecorder + +# uncomment to have Cassandra JVM listen for remote debuggers/profilers on port 1414 +#-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=1414 + +# uncomment to have Cassandra JVM log internal method compilation (developers only) +#-XX:+UnlockDiagnosticVMOptions +#-XX:+LogCompilation + +################# +# HEAP SETTINGS # +################# + +# Heap size is automatically calculated by cassandra-env based on this +# formula: max(min(1/2 ram, 1024MB), min(1/4 ram, 8GB)) +# That is: +# - calculate 1/2 ram and cap to 1024MB +# - calculate 1/4 ram and cap to 8192MB +# - pick the max +# +# For production use you may wish to adjust this for your environment. +# If that's the case, uncomment the -Xmx and Xms options below to override the +# automatic calculation of JVM heap memory. +# +# It is recommended to set min (-Xms) and max (-Xmx) heap sizes to +# the same value to avoid stop-the-world GC pauses during resize, and +# so that we can lock the heap in memory on startup to prevent any +# of it from being swapped out. +#-Xms4G +#-Xmx4G + +# Young generation size is automatically calculated by cassandra-env +# based on this formula: min(100 * num_cores, 1/4 * heap size) +# +# The main trade-off for the young generation is that the larger it +# is, the longer GC pause times will be. The shorter it is, the more +# expensive GC will be (usually). +# +# It is not recommended to set the young generation size if using the +# G1 GC, since that will override the target pause-time goal. +# More info: http://www.oracle.com/technetwork/articles/java/g1gc-1984535.html +# +# The example below assumes a modern 8-core+ machine for decent +# times. If in doubt, and if you do not particularly want to tweak, go +# 100 MB per physical CPU core. +#-Xmn800M + +################################### +# EXPIRATION DATE OVERFLOW POLICY # +################################### + +# Defines how to handle INSERT requests with TTL exceeding the maximum supported expiration date: +# * REJECT: this is the default policy and will reject any requests with expiration date timestamp after 2038-01-19T03:14:06+00:00. +# * CAP: any insert with TTL expiring after 2038-01-19T03:14:06+00:00 will expire on 2038-01-19T03:14:06+00:00 and the client will receive a warning. +# * CAP_NOWARN: same as previous, except that the client warning will not be emitted. +# +#-Dcassandra.expiration_date_overflow_policy=REJECT diff --git a/etc/_copy_cass1/jvm11-clients.options b/etc/_copy_cass1/jvm11-clients.options new file mode 100644 index 0000000..c88b7ab --- /dev/null +++ b/etc/_copy_cass1/jvm11-clients.options @@ -0,0 +1,29 @@ +########################################################################### +# jvm11-clients.options # +# # +# See jvm-clients.options. This file is specific for Java 11 and newer. # +########################################################################### + +################### +# JPMS SETTINGS # +################### + +-Djdk.attach.allowAttachSelf=true +--add-exports java.base/jdk.internal.misc=ALL-UNNAMED +--add-exports java.base/jdk.internal.ref=ALL-UNNAMED +--add-exports java.base/sun.nio.ch=ALL-UNNAMED +--add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED +--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED +--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED +--add-exports java.sql/java.sql=ALL-UNNAMED + +--add-opens java.base/java.lang.module=ALL-UNNAMED +--add-opens java.base/jdk.internal.loader=ALL-UNNAMED +--add-opens java.base/jdk.internal.ref=ALL-UNNAMED +--add-opens java.base/jdk.internal.reflect=ALL-UNNAMED +--add-opens java.base/jdk.internal.math=ALL-UNNAMED +--add-opens java.base/jdk.internal.module=ALL-UNNAMED +--add-opens java.base/jdk.internal.util.jar=ALL-UNNAMED +--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED + +# The newline in the end of file is intentional diff --git a/etc/_copy_cass1/jvm11-server.options b/etc/_copy_cass1/jvm11-server.options new file mode 100644 index 0000000..7e78467 --- /dev/null +++ b/etc/_copy_cass1/jvm11-server.options @@ -0,0 +1,103 @@ +########################################################################### +# jvm11-server.options # +# # +# See jvm-server.options. This file is specific for Java 11 and newer. # +########################################################################### + +################# +# GC SETTINGS # +################# + + + +### CMS Settings +-XX:+UseConcMarkSweepGC +-XX:+CMSParallelRemarkEnabled +-XX:SurvivorRatio=8 +-XX:MaxTenuringThreshold=1 +-XX:CMSInitiatingOccupancyFraction=75 +-XX:+UseCMSInitiatingOccupancyOnly +-XX:CMSWaitDuration=10000 +-XX:+CMSParallelInitialMarkEnabled +-XX:+CMSEdenChunksRecordAlways +## some JVMs will fill up their heap when accessed via JMX, see CASSANDRA-6541 +-XX:+CMSClassUnloadingEnabled + + + +### G1 Settings +## Use the Hotspot garbage-first collector. +#-XX:+UseG1GC +#-XX:+ParallelRefProcEnabled + +# +## Have the JVM do less remembered set work during STW, instead +## preferring concurrent GC. Reduces p99.9 latency. +#-XX:G1RSetUpdatingPauseTimePercent=5 +# +## Main G1GC tunable: lowering the pause target will lower throughput and vise versa. +## 200ms is the JVM default and lowest viable setting +## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml. +#-XX:MaxGCPauseMillis=500 + +## Optional G1 Settings +# Save CPU time on large (>= 16GB) heaps by delaying region scanning +# until the heap is 70% full. The default in Hotspot 8u40 is 40%. +#-XX:InitiatingHeapOccupancyPercent=70 + +# For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores. +# Otherwise equal to the number of cores when 8 or less. +# Machines with > 10 cores should try setting these to <= full cores. +#-XX:ParallelGCThreads=16 +# By default, ConcGCThreads is 1/4 of ParallelGCThreads. +# Setting both to the same value can reduce STW durations. +#-XX:ConcGCThreads=16 + + +### JPMS + +-Djdk.attach.allowAttachSelf=true +--add-exports java.base/jdk.internal.misc=ALL-UNNAMED +--add-exports java.base/jdk.internal.ref=ALL-UNNAMED +--add-exports java.base/sun.nio.ch=ALL-UNNAMED +--add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED +--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED +--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED +--add-exports java.sql/java.sql=ALL-UNNAMED + +--add-opens java.base/java.lang.module=ALL-UNNAMED +--add-opens java.base/jdk.internal.loader=ALL-UNNAMED +--add-opens java.base/jdk.internal.ref=ALL-UNNAMED +--add-opens java.base/jdk.internal.reflect=ALL-UNNAMED +--add-opens java.base/jdk.internal.math=ALL-UNNAMED +--add-opens java.base/jdk.internal.module=ALL-UNNAMED +--add-opens java.base/jdk.internal.util.jar=ALL-UNNAMED +--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED + + +### GC logging options -- uncomment to enable + +# Java 11 (and newer) GC logging options: +# See description of https://bugs.openjdk.java.net/browse/JDK-8046148 for details about the syntax +# The following is the equivalent to -XX:+PrintGCDetails -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M +#-Xlog:gc=info,heap*=trace,age*=debug,safepoint=info,promotion*=trace:file=/var/log/cassandra/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760 + +# Notes for Java 8 migration: +# +# -XX:+PrintGCDetails maps to -Xlog:gc*:... - i.e. add a '*' after "gc" +# -XX:+PrintGCDateStamps maps to decorator 'time' +# +# -XX:+PrintHeapAtGC maps to 'heap' with level 'trace' +# -XX:+PrintTenuringDistribution maps to 'age' with level 'debug' +# -XX:+PrintGCApplicationStoppedTime maps to 'safepoint' with level 'info' +# -XX:+PrintPromotionFailure maps to 'promotion' with level 'trace' +# -XX:PrintFLSStatistics=1 maps to 'freelist' with level 'trace' + +### Netty Options + +# On Java >= 9 Netty requires the io.netty.tryReflectionSetAccessible system property to be set to true to enable +# creation of direct buffers using Unsafe. Without it, this falls back to ByteBuffer.allocateDirect which has +# inferior performance and risks exceeding MaxDirectMemory +-Dio.netty.tryReflectionSetAccessible=true + +# The newline in the end of file is intentional diff --git a/etc/_copy_cass1/jvm8-clients.options b/etc/_copy_cass1/jvm8-clients.options new file mode 100644 index 0000000..7d1b2ef --- /dev/null +++ b/etc/_copy_cass1/jvm8-clients.options @@ -0,0 +1,9 @@ +########################################################################### +# jvm8-clients.options # +# # +# See jvm-clients.options. This file is specific for Java 8 and newer. # +########################################################################### + +# intentionally left empty + +# The newline in the end of file is intentional diff --git a/etc/_copy_cass1/jvm8-server.options b/etc/_copy_cass1/jvm8-server.options new file mode 100644 index 0000000..6214669 --- /dev/null +++ b/etc/_copy_cass1/jvm8-server.options @@ -0,0 +1,76 @@ +########################################################################### +# jvm8-server.options # +# # +# See jvm-server.options. This file is specific for Java 8 and newer. # +########################################################################### + +######################## +# GENERAL JVM SETTINGS # +######################## + +# allows lowering thread priority without being root on linux - probably +# not necessary on Windows but doesn't harm anything. +# see http://tech.stolsvik.com/2010/01/linux-java-thread-priorities-workaround.html +-XX:ThreadPriorityPolicy=42 + +################# +# GC SETTINGS # +################# + +### CMS Settings +-XX:+UseParNewGC +-XX:+UseConcMarkSweepGC +-XX:+CMSParallelRemarkEnabled +-XX:SurvivorRatio=8 +-XX:MaxTenuringThreshold=1 +-XX:CMSInitiatingOccupancyFraction=75 +-XX:+UseCMSInitiatingOccupancyOnly +-XX:CMSWaitDuration=10000 +-XX:+CMSParallelInitialMarkEnabled +-XX:+CMSEdenChunksRecordAlways +## some JVMs will fill up their heap when accessed via JMX, see CASSANDRA-6541 +-XX:+CMSClassUnloadingEnabled + +### G1 Settings +## Use the Hotspot garbage-first collector. +#-XX:+UseG1GC +#-XX:+ParallelRefProcEnabled + +# +## Have the JVM do less remembered set work during STW, instead +## preferring concurrent GC. Reduces p99.9 latency. +#-XX:G1RSetUpdatingPauseTimePercent=5 +# +## Main G1GC tunable: lowering the pause target will lower throughput and vise versa. +## 200ms is the JVM default and lowest viable setting +## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml. +#-XX:MaxGCPauseMillis=500 + +## Optional G1 Settings +# Save CPU time on large (>= 16GB) heaps by delaying region scanning +# until the heap is 70% full. The default in Hotspot 8u40 is 40%. +#-XX:InitiatingHeapOccupancyPercent=70 + +# For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores. +# Otherwise equal to the number of cores when 8 or less. +# Machines with > 10 cores should try setting these to <= full cores. +#-XX:ParallelGCThreads=16 +# By default, ConcGCThreads is 1/4 of ParallelGCThreads. +# Setting both to the same value can reduce STW durations. +#-XX:ConcGCThreads=16 + +### GC logging options -- uncomment to enable + +-XX:+PrintGCDetails +-XX:+PrintGCDateStamps +-XX:+PrintHeapAtGC +-XX:+PrintTenuringDistribution +-XX:+PrintGCApplicationStoppedTime +-XX:+PrintPromotionFailure +#-XX:PrintFLSStatistics=1 +#-Xloggc:/var/log/cassandra/gc.log +-XX:+UseGCLogFileRotation +-XX:NumberOfGCLogFiles=10 +-XX:GCLogFileSize=10M + +# The newline in the end of file is intentional diff --git a/etc/_copy_cass1/logback-tools.xml b/etc/_copy_cass1/logback-tools.xml new file mode 100644 index 0000000..12b59fb --- /dev/null +++ b/etc/_copy_cass1/logback-tools.xml @@ -0,0 +1,34 @@ + + + + + System.err + + %-5level %date{"HH:mm:ss,SSS"} %msg%n + + + WARN + + + + + + + diff --git a/etc/_copy_cass1/logback.xml b/etc/_copy_cass1/logback.xml new file mode 100644 index 0000000..e98fea4 --- /dev/null +++ b/etc/_copy_cass1/logback.xml @@ -0,0 +1,124 @@ + + + + + + + + + + + + + + INFO + + ${cassandra.logdir}/system.log + + + ${cassandra.logdir}/system.log.%d{yyyy-MM-dd}.%i.zip + + 50MB + 7 + 5GB + + + %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + + + + + + + ${cassandra.logdir}/debug.log + + + ${cassandra.logdir}/debug.log.%d{yyyy-MM-dd}.%i.zip + + 50MB + 7 + 5GB + + + %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + + + + + + + 1024 + 0 + true + + + + + + + + INFO + + + %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/etc/_copy_cass1/metrics-reporter-config-sample.yaml b/etc/_copy_cass1/metrics-reporter-config-sample.yaml new file mode 100644 index 0000000..54f2657 --- /dev/null +++ b/etc/_copy_cass1/metrics-reporter-config-sample.yaml @@ -0,0 +1,41 @@ +# For details see: +# * http://wiki.apache.org/cassandra/Metrics +# * https://github.com/addthis/metrics-reporter-config + +# This is an example file for configuring which metrics should go +# where. The sample sends everything to a flat file for humans to +# poke at. metrics-ganglia or metrics-graphite are more likely to +# operationally useful. + +# Some metrics are global for a node (KeyCache capacity) while others +# are broken down by column family or even IP. The sample list +# includes all of the global metrics via a while list. To include +# metrics for the system column family for example add +# "^org.apache.cassandra.metrics.ColumnFamily.system.+". + + +# Start Cassandra with +# -Dcassandra.metricsReporterConfigFile=metrics-reporter-config.yaml +# for this file to be used. If you are using metrics-ganglia, +# metrics-graphite, or a custom reporter you will also have to add those +# jars to the lib directory. Nothing in this file can affect +# jmx metrics. + + +console: + - + outfile: '/tmp/metrics.out' + period: 10 + timeunit: 'SECONDS' + predicate: + color: "white" + useQualifiedName: true + patterns: + - "^org.apache.cassandra.metrics.Cache.+" + - "^org.apache.cassandra.metrics.ClientRequest.+" # includes ClientRequestMetrics + - "^org.apache.cassandra.metrics.CommitLog.+" + - "^org.apache.cassandra.metrics.Compaction.+" + - "^org.apache.cassandra.metrics.DroppedMessage.+" + - "^org.apache.cassandra.metrics.ReadRepair.+" + - "^org.apache.cassandra.metrics.Storage.+" + - "^org.apache.cassandra.metrics.ThreadPools.+" diff --git a/etc/_copy_cass1/triggers/README.txt b/etc/_copy_cass1/triggers/README.txt new file mode 100644 index 0000000..d2f6641 --- /dev/null +++ b/etc/_copy_cass1/triggers/README.txt @@ -0,0 +1 @@ +Place triggers to be loaded in this directory, as jar files. diff --git a/etc/cass1/.gitignore b/etc/cass1/.gitignore new file mode 100644 index 0000000..e81e4e0 --- /dev/null +++ b/etc/cass1/.gitignore @@ -0,0 +1,11 @@ +# ignore everything +#* + +# except +# .gitignore +# !cassandra.yaml +# !jvm* +# !logback.xml +# !commitlog_archiving.properties +# !cassandra-rackdc.properties +# !cassandra-env.sh \ No newline at end of file diff --git a/etc/cass1/cassandra-env.sh b/etc/cass1/cassandra-env.sh new file mode 100644 index 0000000..2e3c8c9 --- /dev/null +++ b/etc/cass1/cassandra-env.sh @@ -0,0 +1,307 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +calculate_heap_sizes() +{ + case "`uname`" in + Linux) + system_memory_in_mb=`free -m | awk '/:/ {print $2;exit}'` + system_cpu_cores=`egrep -c 'processor([[:space:]]+):.*' /proc/cpuinfo` + ;; + FreeBSD) + system_memory_in_bytes=`sysctl hw.physmem | awk '{print $2}'` + system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024` + system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'` + ;; + SunOS) + system_memory_in_mb=`prtconf | awk '/Memory size:/ {print $3}'` + system_cpu_cores=`psrinfo | wc -l` + ;; + Darwin) + system_memory_in_bytes=`sysctl hw.memsize | awk '{print $2}'` + system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024` + system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'` + ;; + *) + # assume reasonable defaults for e.g. a modern desktop or + # cheap server + system_memory_in_mb="2048" + system_cpu_cores="2" + ;; + esac + + # some systems like the raspberry pi don't report cores, use at least 1 + if [ "$system_cpu_cores" -lt "1" ] + then + system_cpu_cores="1" + fi + + # set max heap size based on the following + # max(min(1/2 ram, 1024MB), min(1/4 ram, 8GB)) + # calculate 1/2 ram and cap to 1024MB + # calculate 1/4 ram and cap to 8192MB + # pick the max + half_system_memory_in_mb=`expr $system_memory_in_mb / 2` + quarter_system_memory_in_mb=`expr $half_system_memory_in_mb / 2` + if [ "$half_system_memory_in_mb" -gt "1024" ] + then + half_system_memory_in_mb="1024" + fi + if [ "$quarter_system_memory_in_mb" -gt "8192" ] + then + quarter_system_memory_in_mb="8192" + fi + if [ "$half_system_memory_in_mb" -gt "$quarter_system_memory_in_mb" ] + then + max_heap_size_in_mb="$half_system_memory_in_mb" + else + max_heap_size_in_mb="$quarter_system_memory_in_mb" + fi + MAX_HEAP_SIZE="${max_heap_size_in_mb}M" + + # Young gen: min(max_sensible_per_modern_cpu_core * num_cores, 1/4 * heap size) + max_sensible_yg_per_core_in_mb="100" + max_sensible_yg_in_mb=`expr $max_sensible_yg_per_core_in_mb "*" $system_cpu_cores` + + desired_yg_in_mb=`expr $max_heap_size_in_mb / 4` + + if [ "$desired_yg_in_mb" -gt "$max_sensible_yg_in_mb" ] + then + HEAP_NEWSIZE="${max_sensible_yg_in_mb}M" + else + HEAP_NEWSIZE="${desired_yg_in_mb}M" + fi +} + +# Sets the path where logback and GC logs are written. +if [ "x$CASSANDRA_LOG_DIR" = "x" ] ; then + CASSANDRA_LOG_DIR="$CASSANDRA_HOME/logs" +fi + +#GC log path has to be defined here because it needs to access CASSANDRA_HOME +if [ $JAVA_VERSION -ge 11 ] ; then + # See description of https://bugs.openjdk.java.net/browse/JDK-8046148 for details about the syntax + # The following is the equivalent to -XX:+PrintGCDetails -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M + echo "$JVM_OPTS" | grep -qe "-[X]log:gc" + if [ "$?" = "1" ] ; then # [X] to prevent ccm from replacing this line + # only add -Xlog:gc if it's not mentioned in jvm-server.options file + mkdir -p ${CASSANDRA_LOG_DIR} + JVM_OPTS="$JVM_OPTS -Xlog:gc=info,heap*=trace,age*=debug,safepoint=info,promotion*=trace:file=${CASSANDRA_LOG_DIR}/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760" + fi +else + # Java 8 + echo "$JVM_OPTS" | grep -qe "-[X]loggc" + if [ "$?" = "1" ] ; then # [X] to prevent ccm from replacing this line + # only add -Xlog:gc if it's not mentioned in jvm-server.options file + mkdir -p ${CASSANDRA_LOG_DIR} + JVM_OPTS="$JVM_OPTS -Xloggc:${CASSANDRA_LOG_DIR}/gc.log" + fi +fi + +# Check what parameters were defined on jvm-server.options file to avoid conflicts +echo $JVM_OPTS | grep -q Xmn +DEFINED_XMN=$? +echo $JVM_OPTS | grep -q Xmx +DEFINED_XMX=$? +echo $JVM_OPTS | grep -q Xms +DEFINED_XMS=$? +echo $JVM_OPTS | grep -q UseConcMarkSweepGC +USING_CMS=$? +echo $JVM_OPTS | grep -q +UseG1GC +USING_G1=$? + +# Override these to set the amount of memory to allocate to the JVM at +# start-up. For production use you may wish to adjust this for your +# environment. MAX_HEAP_SIZE is the total amount of memory dedicated +# to the Java heap. HEAP_NEWSIZE refers to the size of the young +# generation. Both MAX_HEAP_SIZE and HEAP_NEWSIZE should be either set +# or not (if you set one, set the other). +# +# The main trade-off for the young generation is that the larger it +# is, the longer GC pause times will be. The shorter it is, the more +# expensive GC will be (usually). +# +# The example HEAP_NEWSIZE assumes a modern 8-core+ machine for decent pause +# times. If in doubt, and if you do not particularly want to tweak, go with +# 100 MB per physical CPU core. + +#MAX_HEAP_SIZE="4G" +#HEAP_NEWSIZE="800M" + +# Set this to control the amount of arenas per-thread in glibc +#export MALLOC_ARENA_MAX=4 + +# only calculate the size if it's not set manually +if [ "x$MAX_HEAP_SIZE" = "x" ] && [ "x$HEAP_NEWSIZE" = "x" -o $USING_G1 -eq 0 ]; then + calculate_heap_sizes +elif [ "x$MAX_HEAP_SIZE" = "x" ] || [ "x$HEAP_NEWSIZE" = "x" -a $USING_G1 -ne 0 ]; then + echo "please set or unset MAX_HEAP_SIZE and HEAP_NEWSIZE in pairs when using CMS GC (see cassandra-env.sh)" + exit 1 +fi + +if [ "x$MALLOC_ARENA_MAX" = "x" ] ; then + export MALLOC_ARENA_MAX=4 +fi + +# We only set -Xms and -Xmx if they were not defined on jvm-server.options file +# If defined, both Xmx and Xms should be defined together. +if [ $DEFINED_XMX -ne 0 ] && [ $DEFINED_XMS -ne 0 ]; then + JVM_OPTS="$JVM_OPTS -Xms${MAX_HEAP_SIZE}" + JVM_OPTS="$JVM_OPTS -Xmx${MAX_HEAP_SIZE}" +elif [ $DEFINED_XMX -ne 0 ] || [ $DEFINED_XMS -ne 0 ]; then + echo "Please set or unset -Xmx and -Xms flags in pairs on jvm-server.options file." + exit 1 +fi + +# We only set -Xmn flag if it was not defined in jvm-server.options file +# and if the CMS GC is being used +# If defined, both Xmn and Xmx should be defined together. +if [ $DEFINED_XMN -eq 0 ] && [ $DEFINED_XMX -ne 0 ]; then + echo "Please set or unset -Xmx and -Xmn flags in pairs on jvm-server.options file." + exit 1 +elif [ $DEFINED_XMN -ne 0 ] && [ $USING_CMS -eq 0 ]; then + JVM_OPTS="$JVM_OPTS -Xmn${HEAP_NEWSIZE}" +fi + +# We fail to start if -Xmn is used with G1 GC is being used +# See comments for -Xmn in jvm-server.options +if [ $DEFINED_XMN -eq 0 ] && [ $USING_G1 -eq 0 ]; then + echo "It is not recommended to set -Xmn with the G1 garbage collector. See comments for -Xmn in jvm-server.options for details." + exit 1 +fi + +if [ "$JVM_ARCH" = "64-Bit" ] && [ $USING_CMS -eq 0 ]; then + JVM_OPTS="$JVM_OPTS -XX:+UseCondCardMark" +fi + +# provides hints to the JIT compiler +JVM_OPTS="$JVM_OPTS -XX:CompileCommandFile=$CASSANDRA_CONF/hotspot_compiler" + +# add the jamm javaagent +JVM_OPTS="$JVM_OPTS -javaagent:$CASSANDRA_HOME/lib/jamm-0.3.2.jar" + +# set jvm HeapDumpPath with CASSANDRA_HEAPDUMP_DIR +if [ "x$CASSANDRA_HEAPDUMP_DIR" != "x" ]; then + JVM_OPTS="$JVM_OPTS -XX:HeapDumpPath=$CASSANDRA_HEAPDUMP_DIR/cassandra-`date +%s`-pid$$.hprof" +fi + +# stop the jvm on OutOfMemoryError as it can result in some data corruption +# uncomment the preferred option +# ExitOnOutOfMemoryError and CrashOnOutOfMemoryError require a JRE greater or equals to 1.7 update 101 or 1.8 update 92 +# For OnOutOfMemoryError we cannot use the JVM_OPTS variables because bash commands split words +# on white spaces without taking quotes into account +# JVM_OPTS="$JVM_OPTS -XX:+ExitOnOutOfMemoryError" +# JVM_OPTS="$JVM_OPTS -XX:+CrashOnOutOfMemoryError" +JVM_ON_OUT_OF_MEMORY_ERROR_OPT="-XX:OnOutOfMemoryError=kill -9 %p" + +# print an heap histogram on OutOfMemoryError +# JVM_OPTS="$JVM_OPTS -Dcassandra.printHeapHistogramOnOutOfMemoryError=true" + +# jmx: metrics and administration interface +# +# add this if you're having trouble connecting: +# JVM_OPTS="$JVM_OPTS -Djava.rmi.server.hostname=" +# +# see +# https://blogs.oracle.com/jmxetc/entry/troubleshooting_connection_problems_in_jconsole +# for more on configuring JMX through firewalls, etc. (Short version: +# get it working with no firewall first.) +# +# Cassandra ships with JMX accessible *only* from localhost. +# To enable remote JMX connections, uncomment lines below +# with authentication and/or ssl enabled. See https://wiki.apache.org/cassandra/JmxSecurity +# +if [ "x$LOCAL_JMX" = "x" ]; then + LOCAL_JMX=yes +fi + +# Specifies the default port over which Cassandra will be available for +# JMX connections. +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +JMX_PORT="7199" + +if [ "$LOCAL_JMX" = "yes" ]; then + JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.local.port=$JMX_PORT" + JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=false" +else + JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.port=$JMX_PORT" + # if ssl is enabled the same port cannot be used for both jmx and rmi so either + # pick another value for this property or comment out to use a random port (though see CASSANDRA-7087 for origins) + JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.rmi.port=$JMX_PORT" + + # turn on JMX authentication. See below for further options + JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=true" + + # jmx ssl options + #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl=true" + #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.need.client.auth=true" + #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.enabled.protocols=" + #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.enabled.cipher.suites=" + #JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.keyStore=/path/to/keystore" + #JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.keyStorePassword=" + #JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.trustStore=/path/to/truststore" + #JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.trustStorePassword=" +fi + +# jmx authentication and authorization options. By default, auth is only +# activated for remote connections but they can also be enabled for local only JMX +## Basic file based authn & authz +JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.password.file=/etc/cassandra/jmxremote.password" +#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.access.file=/etc/cassandra/jmxremote.access" +## Custom auth settings which can be used as alternatives to JMX's out of the box auth utilities. +## JAAS login modules can be used for authentication by uncommenting these two properties. +## Cassandra ships with a LoginModule implementation - org.apache.cassandra.auth.CassandraLoginModule - +## which delegates to the IAuthenticator configured in cassandra.yaml. See the sample JAAS configuration +## file cassandra-jaas.config +#JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.login.config=CassandraLogin" +#JVM_OPTS="$JVM_OPTS -Djava.security.auth.login.config=$CASSANDRA_CONF/cassandra-jaas.config" + +## Cassandra also ships with a helper for delegating JMX authz calls to the configured IAuthorizer, +## uncomment this to use it. Requires one of the two authentication options to be enabled +#JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.authorizer=org.apache.cassandra.auth.jmx.AuthorizationProxy" + +# To use mx4j, an HTML interface for JMX, add mx4j-tools.jar to the lib/ +# directory. +# See http://cassandra.apache.org/doc/latest/operating/metrics.html#jmx +# By default mx4j listens on the broadcast_address, port 8081. Uncomment the following lines +# to control its listen address and port. +#MX4J_ADDRESS="127.0.0.1" +#MX4J_PORT="8081" + +# Cassandra uses SIGAR to capture OS metrics CASSANDRA-7838 +# for SIGAR we have to set the java.library.path +# to the location of the native libraries. +JVM_OPTS="$JVM_OPTS -Djava.library.path=$CASSANDRA_HOME/lib/sigar-bin" + +if [ "x$MX4J_ADDRESS" != "x" ]; then + if [[ "$MX4J_ADDRESS" == \-Dmx4jaddress* ]]; then + # Backward compatible with the older style #13578 + JVM_OPTS="$JVM_OPTS $MX4J_ADDRESS" + else + JVM_OPTS="$JVM_OPTS -Dmx4jaddress=$MX4J_ADDRESS" + fi +fi +if [ "x$MX4J_PORT" != "x" ]; then + if [[ "$MX4J_PORT" == \-Dmx4jport* ]]; then + # Backward compatible with the older style #13578 + JVM_OPTS="$JVM_OPTS $MX4J_PORT" + else + JVM_OPTS="$JVM_OPTS -Dmx4jport=$MX4J_PORT" + fi +fi + +JVM_OPTS="$JVM_OPTS $JVM_EXTRA_OPTS" + diff --git a/etc/cass1/cassandra-rackdc.properties b/etc/cass1/cassandra-rackdc.properties new file mode 100644 index 0000000..84716f5 --- /dev/null +++ b/etc/cass1/cassandra-rackdc.properties @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# These properties are used with GossipingPropertyFileSnitch and will +# indicate the rack and dc for this node +dc= Mars +rack= West + +# Add a suffix to a datacenter name. Used by the Ec2Snitch and Ec2MultiRegionSnitch +# to append a string to the EC2 region name. +#dc_suffix= + +# Uncomment the following line to make this snitch prefer the internal ip when possible, as the Ec2MultiRegionSnitch does. +# prefer_local=true + +# Datacenter and rack naming convention used by the Ec2Snitch and Ec2MultiRegionSnitch. +# Options are: +# legacy : datacenter name is the part of the availability zone name preceding the last "-" +# when the zone ends in -1 and includes the number if not -1. Rack is the portion of +# the availability zone name following the last "-". +# Examples: us-west-1a => dc: us-west, rack: 1a; us-west-2b => dc: us-west-2, rack: 2b; +# YOU MUST USE THIS VALUE IF YOU ARE UPGRADING A PRE-4.0 CLUSTER +# standard : Default value. datacenter name is the standard AWS region name, including the number. +# rack name is the region plus the availability zone letter. +# Examples: us-west-1a => dc: us-west-1, rack: us-west-1a; us-west-2b => dc: us-west-2, rack: us-west-2b; +# ec2_naming_scheme=standard diff --git a/etc/cass1/cassandra.yaml b/etc/cass1/cassandra.yaml new file mode 100644 index 0000000..4c037b9 --- /dev/null +++ b/etc/cass1/cassandra.yaml @@ -0,0 +1,1419 @@ + +# Cassandra storage config YAML + +# NOTE: +# See https://cassandra.apache.org/doc/latest/configuration/ for +# full explanations of configuration directives +# /NOTE + +# The name of the cluster. This is mainly used to prevent machines in +# one logical cluster from joining another. +cluster_name: SolarSystem + +# This defines the number of tokens randomly assigned to this node on the ring +# The more tokens, relative to other nodes, the larger the proportion of data +# that this node will store. You probably want all nodes to have the same number +# of tokens assuming they have equal hardware capability. +# +# If you leave this unspecified, Cassandra will use the default of 1 token for legacy compatibility, +# and will use the initial_token as described below. +# +# Specifying initial_token will override this setting on the node's initial start, +# on subsequent starts, this setting will apply even if initial token is set. +# +# See https://cassandra.apache.org/doc/latest/getting_started/production.html#tokens for +# best practice information about num_tokens. +# +num_tokens: 128 + +# Triggers automatic allocation of num_tokens tokens for this node. The allocation +# algorithm attempts to choose tokens in a way that optimizes replicated load over +# the nodes in the datacenter for the replica factor. +# +# The load assigned to each node will be close to proportional to its number of +# vnodes. +# +# Only supported with the Murmur3Partitioner. + +# Replica factor is determined via the replication strategy used by the specified +# keyspace. +# allocate_tokens_for_keyspace: KEYSPACE + +# Replica factor is explicitly set, regardless of keyspace or datacenter. +# This is the replica factor within the datacenter, like NTS. +allocate_tokens_for_local_replication_factor: 3 + +# initial_token allows you to specify tokens manually. While you can use it with +# vnodes (num_tokens > 1, above) -- in which case you should provide a +# comma-separated list -- it's primarily used when adding nodes to legacy clusters +# that do not have vnodes enabled. +# initial_token: + +# May either be "true" or "false" to enable globally +hinted_handoff_enabled: true + +# When hinted_handoff_enabled is true, a black list of data centers that will not +# perform hinted handoff +# hinted_handoff_disabled_datacenters: +# - DC1 +# - DC2 + +# this defines the maximum amount of time a dead host will have hints +# generated. After it has been dead this long, new hints for it will not be +# created until it has been seen alive and gone down again. +max_hint_window_in_ms: 10800000 # 3 hours + +# Maximum throttle in KBs per second, per delivery thread. This will be +# reduced proportionally to the number of nodes in the cluster. (If there +# are two nodes in the cluster, each delivery thread will use the maximum +# rate; if there are three, each will throttle to half of the maximum, +# since we expect two nodes to be delivering hints simultaneously.) +hinted_handoff_throttle_in_kb: 1024 + +# Number of threads with which to deliver hints; +# Consider increasing this number when you have multi-dc deployments, since +# cross-dc handoff tends to be slower +max_hints_delivery_threads: 2 + +# Directory where Cassandra should store hints. +# If not set, the default directory is $CASSANDRA_HOME/data/hints. +# hints_directory: /var/lib/cassandra/hints + +# How often hints should be flushed from the internal buffers to disk. +# Will *not* trigger fsync. +hints_flush_period_in_ms: 10000 + +# Maximum size for a single hints file, in megabytes. +max_hints_file_size_in_mb: 128 + +# Compression to apply to the hint files. If omitted, hints files +# will be written uncompressed. LZ4, Snappy, and Deflate compressors +# are supported. +#hints_compression: +# - class_name: LZ4Compressor +# parameters: +# - + +# Maximum throttle in KBs per second, total. This will be +# reduced proportionally to the number of nodes in the cluster. +batchlog_replay_throttle_in_kb: 1024 + +# Authentication backend, implementing IAuthenticator; used to identify users +# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthenticator, +# PasswordAuthenticator}. +# +# - AllowAllAuthenticator performs no checks - set it to disable authentication. +# - PasswordAuthenticator relies on username/password pairs to authenticate +# users. It keeps usernames and hashed passwords in system_auth.roles table. +# Please increase system_auth keyspace replication factor if you use this authenticator. +# If using PasswordAuthenticator, CassandraRoleManager must also be used (see below) +authenticator: AllowAllAuthenticator + +# Authorization backend, implementing IAuthorizer; used to limit access/provide permissions +# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthorizer, +# CassandraAuthorizer}. +# +# - AllowAllAuthorizer allows any action to any user - set it to disable authorization. +# - CassandraAuthorizer stores permissions in system_auth.role_permissions table. Please +# increase system_auth keyspace replication factor if you use this authorizer. +authorizer: AllowAllAuthorizer + +# Part of the Authentication & Authorization backend, implementing IRoleManager; used +# to maintain grants and memberships between roles. +# Out of the box, Cassandra provides org.apache.cassandra.auth.CassandraRoleManager, +# which stores role information in the system_auth keyspace. Most functions of the +# IRoleManager require an authenticated login, so unless the configured IAuthenticator +# actually implements authentication, most of this functionality will be unavailable. +# +# - CassandraRoleManager stores role data in the system_auth keyspace. Please +# increase system_auth keyspace replication factor if you use this role manager. +role_manager: CassandraRoleManager + +# Network authorization backend, implementing INetworkAuthorizer; used to restrict user +# access to certain DCs +# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllNetworkAuthorizer, +# CassandraNetworkAuthorizer}. +# +# - AllowAllNetworkAuthorizer allows access to any DC to any user - set it to disable authorization. +# - CassandraNetworkAuthorizer stores permissions in system_auth.network_permissions table. Please +# increase system_auth keyspace replication factor if you use this authorizer. +network_authorizer: AllowAllNetworkAuthorizer + +# Validity period for roles cache (fetching granted roles can be an expensive +# operation depending on the role manager, CassandraRoleManager is one example) +# Granted roles are cached for authenticated sessions in AuthenticatedUser and +# after the period specified here, become eligible for (async) reload. +# Defaults to 2000, set to 0 to disable caching entirely. +# Will be disabled automatically for AllowAllAuthenticator. +roles_validity_in_ms: 2000 + +# Refresh interval for roles cache (if enabled). +# After this interval, cache entries become eligible for refresh. Upon next +# access, an async reload is scheduled and the old value returned until it +# completes. If roles_validity_in_ms is non-zero, then this must be +# also. +# Defaults to the same value as roles_validity_in_ms. +# roles_update_interval_in_ms: 2000 + +# Validity period for permissions cache (fetching permissions can be an +# expensive operation depending on the authorizer, CassandraAuthorizer is +# one example). Defaults to 2000, set to 0 to disable. +# Will be disabled automatically for AllowAllAuthorizer. +permissions_validity_in_ms: 2000 + +# Refresh interval for permissions cache (if enabled). +# After this interval, cache entries become eligible for refresh. Upon next +# access, an async reload is scheduled and the old value returned until it +# completes. If permissions_validity_in_ms is non-zero, then this must be +# also. +# Defaults to the same value as permissions_validity_in_ms. +# permissions_update_interval_in_ms: 2000 + +# Validity period for credentials cache. This cache is tightly coupled to +# the provided PasswordAuthenticator implementation of IAuthenticator. If +# another IAuthenticator implementation is configured, this cache will not +# be automatically used and so the following settings will have no effect. +# Please note, credentials are cached in their encrypted form, so while +# activating this cache may reduce the number of queries made to the +# underlying table, it may not bring a significant reduction in the +# latency of individual authentication attempts. +# Defaults to 2000, set to 0 to disable credentials caching. +credentials_validity_in_ms: 2000 + +# Refresh interval for credentials cache (if enabled). +# After this interval, cache entries become eligible for refresh. Upon next +# access, an async reload is scheduled and the old value returned until it +# completes. If credentials_validity_in_ms is non-zero, then this must be +# also. +# Defaults to the same value as credentials_validity_in_ms. +# credentials_update_interval_in_ms: 2000 + +# The partitioner is responsible for distributing groups of rows (by +# partition key) across nodes in the cluster. The partitioner can NOT be +# changed without reloading all data. If you are adding nodes or upgrading, +# you should set this to the same partitioner that you are currently using. +# +# The default partitioner is the Murmur3Partitioner. Older partitioners +# such as the RandomPartitioner, ByteOrderedPartitioner, and +# OrderPreservingPartitioner have been included for backward compatibility only. +# For new clusters, you should NOT change this value. +# +partitioner: org.apache.cassandra.dht.Murmur3Partitioner + +# Directories where Cassandra should store data on disk. If multiple +# directories are specified, Cassandra will spread data evenly across +# them by partitioning the token ranges. +# If not set, the default directory is $CASSANDRA_HOME/data/data. +# data_file_directories: +# - /var/lib/cassandra/data + +# Directory were Cassandra should store the data of the local system keyspaces. +# By default Cassandra will store the data of the local system keyspaces in the first of the data directories specified +# by data_file_directories. +# This approach ensures that if one of the other disks is lost Cassandra can continue to operate. For extra security +# this setting allows to store those data on a different directory that provides redundancy. +# local_system_data_file_directory: + +# commit log. when running on magnetic HDD, this should be a +# separate spindle than the data directories. +# If not set, the default directory is $CASSANDRA_HOME/data/commitlog. +# commitlog_directory: /var/lib/cassandra/commitlog + +# Enable / disable CDC functionality on a per-node basis. This modifies the logic used +# for write path allocation rejection (standard: never reject. cdc: reject Mutation +# containing a CDC-enabled table if at space limit in cdc_raw_directory). +cdc_enabled: false + +# CommitLogSegments are moved to this directory on flush if cdc_enabled: true and the +# segment contains mutations for a CDC-enabled table. This should be placed on a +# separate spindle than the data directories. If not set, the default directory is +# $CASSANDRA_HOME/data/cdc_raw. +# cdc_raw_directory: /var/lib/cassandra/cdc_raw + +# Policy for data disk failures: +# +# die +# shut down gossip and client transports and kill the JVM for any fs errors or +# single-sstable errors, so the node can be replaced. +# +# stop_paranoid +# shut down gossip and client transports even for single-sstable errors, +# kill the JVM for errors during startup. +# +# stop +# shut down gossip and client transports, leaving the node effectively dead, but +# can still be inspected via JMX, kill the JVM for errors during startup. +# +# best_effort +# stop using the failed disk and respond to requests based on +# remaining available sstables. This means you WILL see obsolete +# data at CL.ONE! +# +# ignore +# ignore fatal errors and let requests fail, as in pre-1.2 Cassandra +disk_failure_policy: stop + +# Policy for commit disk failures: +# +# die +# shut down the node and kill the JVM, so the node can be replaced. +# +# stop +# shut down the node, leaving the node effectively dead, but +# can still be inspected via JMX. +# +# stop_commit +# shutdown the commit log, letting writes collect but +# continuing to service reads, as in pre-2.0.5 Cassandra +# +# ignore +# ignore fatal errors and let the batches fail +commit_failure_policy: stop + +# Maximum size of the native protocol prepared statement cache +# +# Valid values are either "auto" (omitting the value) or a value greater 0. +# +# Note that specifying a too large value will result in long running GCs and possbily +# out-of-memory errors. Keep the value at a small fraction of the heap. +# +# If you constantly see "prepared statements discarded in the last minute because +# cache limit reached" messages, the first step is to investigate the root cause +# of these messages and check whether prepared statements are used correctly - +# i.e. use bind markers for variable parts. +# +# Do only change the default value, if you really have more prepared statements than +# fit in the cache. In most cases it is not neccessary to change this value. +# Constantly re-preparing statements is a performance penalty. +# +# Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater +prepared_statements_cache_size_mb: + +# Maximum size of the key cache in memory. +# +# Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the +# minimum, sometimes more. The key cache is fairly tiny for the amount of +# time it saves, so it's worthwhile to use it at large numbers. +# The row cache saves even more time, but must contain the entire row, +# so it is extremely space-intensive. It's best to only use the +# row cache if you have hot rows or static rows. +# +# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. +# +# Default value is empty to make it "auto" (min(5% of Heap (in MB), 100MB)). Set to 0 to disable key cache. +key_cache_size_in_mb: + +# Duration in seconds after which Cassandra should +# save the key cache. Caches are saved to saved_caches_directory as +# specified in this configuration file. +# +# Saved caches greatly improve cold-start speeds, and is relatively cheap in +# terms of I/O for the key cache. Row cache saving is much more expensive and +# has limited use. +# +# Default is 14400 or 4 hours. +key_cache_save_period: 14400 + +# Number of keys from the key cache to save +# Disabled by default, meaning all keys are going to be saved +# key_cache_keys_to_save: 100 + +# Row cache implementation class name. Available implementations: +# +# org.apache.cassandra.cache.OHCProvider +# Fully off-heap row cache implementation (default). +# +# org.apache.cassandra.cache.SerializingCacheProvider +# This is the row cache implementation availabile +# in previous releases of Cassandra. +# row_cache_class_name: org.apache.cassandra.cache.OHCProvider + +# Maximum size of the row cache in memory. +# Please note that OHC cache implementation requires some additional off-heap memory to manage +# the map structures and some in-flight memory during operations before/after cache entries can be +# accounted against the cache capacity. This overhead is usually small compared to the whole capacity. +# Do not specify more memory that the system can afford in the worst usual situation and leave some +# headroom for OS block level cache. Do never allow your system to swap. +# +# Default value is 0, to disable row caching. +row_cache_size_in_mb: 0 + +# Duration in seconds after which Cassandra should save the row cache. +# Caches are saved to saved_caches_directory as specified in this configuration file. +# +# Saved caches greatly improve cold-start speeds, and is relatively cheap in +# terms of I/O for the key cache. Row cache saving is much more expensive and +# has limited use. +# +# Default is 0 to disable saving the row cache. +row_cache_save_period: 0 + +# Number of keys from the row cache to save. +# Specify 0 (which is the default), meaning all keys are going to be saved +# row_cache_keys_to_save: 100 + +# Maximum size of the counter cache in memory. +# +# Counter cache helps to reduce counter locks' contention for hot counter cells. +# In case of RF = 1 a counter cache hit will cause Cassandra to skip the read before +# write entirely. With RF > 1 a counter cache hit will still help to reduce the duration +# of the lock hold, helping with hot counter cell updates, but will not allow skipping +# the read entirely. Only the local (clock, count) tuple of a counter cell is kept +# in memory, not the whole counter, so it's relatively cheap. +# +# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. +# +# Default value is empty to make it "auto" (min(2.5% of Heap (in MB), 50MB)). Set to 0 to disable counter cache. +# NOTE: if you perform counter deletes and rely on low gcgs, you should disable the counter cache. +counter_cache_size_in_mb: + +# Duration in seconds after which Cassandra should +# save the counter cache (keys only). Caches are saved to saved_caches_directory as +# specified in this configuration file. +# +# Default is 7200 or 2 hours. +counter_cache_save_period: 7200 + +# Number of keys from the counter cache to save +# Disabled by default, meaning all keys are going to be saved +# counter_cache_keys_to_save: 100 + +# saved caches +# If not set, the default directory is $CASSANDRA_HOME/data/saved_caches. +# saved_caches_directory: /var/lib/cassandra/saved_caches + +# Number of seconds the server will wait for each cache (row, key, etc ...) to load while starting +# the Cassandra process. Setting this to a negative value is equivalent to disabling all cache loading on startup +# while still having the cache during runtime. +# cache_load_timeout_seconds: 30 + +# commitlog_sync may be either "periodic", "group", or "batch." +# +# When in batch mode, Cassandra won't ack writes until the commit log +# has been flushed to disk. Each incoming write will trigger the flush task. +# commitlog_sync_batch_window_in_ms is a deprecated value. Previously it had +# almost no value, and is being removed. +# +# commitlog_sync_batch_window_in_ms: 2 +# +# group mode is similar to batch mode, where Cassandra will not ack writes +# until the commit log has been flushed to disk. The difference is group +# mode will wait up to commitlog_sync_group_window_in_ms between flushes. +# +# commitlog_sync_group_window_in_ms: 1000 +# +# the default option is "periodic" where writes may be acked immediately +# and the CommitLog is simply synced every commitlog_sync_period_in_ms +# milliseconds. +commitlog_sync: periodic +commitlog_sync_period_in_ms: 10000 + +# When in periodic commitlog mode, the number of milliseconds to block writes +# while waiting for a slow disk flush to complete. +# periodic_commitlog_sync_lag_block_in_ms: + +# The size of the individual commitlog file segments. A commitlog +# segment may be archived, deleted, or recycled once all the data +# in it (potentially from each columnfamily in the system) has been +# flushed to sstables. +# +# The default size is 32, which is almost always fine, but if you are +# archiving commitlog segments (see commitlog_archiving.properties), +# then you probably want a finer granularity of archiving; 8 or 16 MB +# is reasonable. +# Max mutation size is also configurable via max_mutation_size_in_kb setting in +# cassandra.yaml. The default is half the size commitlog_segment_size_in_mb * 1024. +# This should be positive and less than 2048. +# +# NOTE: If max_mutation_size_in_kb is set explicitly then commitlog_segment_size_in_mb must +# be set to at least twice the size of max_mutation_size_in_kb / 1024 +# +commitlog_segment_size_in_mb: 32 + +# Compression to apply to the commit log. If omitted, the commit log +# will be written uncompressed. LZ4, Snappy, and Deflate compressors +# are supported. +# commitlog_compression: +# - class_name: LZ4Compressor +# parameters: +# - + +# Compression to apply to SSTables as they flush for compressed tables. +# Note that tables without compression enabled do not respect this flag. +# +# As high ratio compressors like LZ4HC, Zstd, and Deflate can potentially +# block flushes for too long, the default is to flush with a known fast +# compressor in those cases. Options are: +# +# none : Flush without compressing blocks but while still doing checksums. +# fast : Flush with a fast compressor. If the table is already using a +# fast compressor that compressor is used. +# table: Always flush with the same compressor that the table uses. This +# was the pre 4.0 behavior. +# +# flush_compression: fast + +# any class that implements the SeedProvider interface and has a +# constructor that takes a Map of parameters will do. +seed_provider: + # Addresses of hosts that are deemed contact points. + # Cassandra nodes use this list of hosts to find each other and learn + # the topology of the ring. You must change this if you are running + # multiple nodes! + - class_name: org.apache.cassandra.locator.SimpleSeedProvider + parameters: + # seeds is actually a comma-delimited list of addresses. + # Ex: ",," + - seeds: "cass1,cass2" + +# For workloads with more data than can fit in memory, Cassandra's +# bottleneck will be reads that need to fetch data from +# disk. "concurrent_reads" should be set to (16 * number_of_drives) in +# order to allow the operations to enqueue low enough in the stack +# that the OS and drives can reorder them. Same applies to +# "concurrent_counter_writes", since counter writes read the current +# values before incrementing and writing them back. +# +# On the other hand, since writes are almost never IO bound, the ideal +# number of "concurrent_writes" is dependent on the number of cores in +# your system; (8 * number_of_cores) is a good rule of thumb. +concurrent_reads: 32 +concurrent_writes: 32 +concurrent_counter_writes: 32 + +# For materialized view writes, as there is a read involved, so this should +# be limited by the less of concurrent reads or concurrent writes. +concurrent_materialized_view_writes: 32 + +# Maximum memory to use for inter-node and client-server networking buffers. +# +# Defaults to the smaller of 1/16 of heap or 128MB. This pool is allocated off-heap, +# so is in addition to the memory allocated for heap. The cache also has on-heap +# overhead which is roughly 128 bytes per chunk (i.e. 0.2% of the reserved size +# if the default 64k chunk size is used). +# Memory is only allocated when needed. +# networking_cache_size_in_mb: 128 + +# Enable the sstable chunk cache. The chunk cache will store recently accessed +# sections of the sstable in-memory as uncompressed buffers. +# file_cache_enabled: false + +# Maximum memory to use for sstable chunk cache and buffer pooling. +# 32MB of this are reserved for pooling buffers, the rest is used for chunk cache +# that holds uncompressed sstable chunks. +# Defaults to the smaller of 1/4 of heap or 512MB. This pool is allocated off-heap, +# so is in addition to the memory allocated for heap. The cache also has on-heap +# overhead which is roughly 128 bytes per chunk (i.e. 0.2% of the reserved size +# if the default 64k chunk size is used). +# Memory is only allocated when needed. +# file_cache_size_in_mb: 512 + +# Flag indicating whether to allocate on or off heap when the sstable buffer +# pool is exhausted, that is when it has exceeded the maximum memory +# file_cache_size_in_mb, beyond which it will not cache buffers but allocate on request. + +# buffer_pool_use_heap_if_exhausted: true + +# The strategy for optimizing disk read +# Possible values are: +# ssd (for solid state disks, the default) +# spinning (for spinning disks) +# disk_optimization_strategy: ssd + +# Total permitted memory to use for memtables. Cassandra will stop +# accepting writes when the limit is exceeded until a flush completes, +# and will trigger a flush based on memtable_cleanup_threshold +# If omitted, Cassandra will set both to 1/4 the size of the heap. +# memtable_heap_space_in_mb: 2048 +# memtable_offheap_space_in_mb: 2048 + +# memtable_cleanup_threshold is deprecated. The default calculation +# is the only reasonable choice. See the comments on memtable_flush_writers +# for more information. +# +# Ratio of occupied non-flushing memtable size to total permitted size +# that will trigger a flush of the largest memtable. Larger mct will +# mean larger flushes and hence less compaction, but also less concurrent +# flush activity which can make it difficult to keep your disks fed +# under heavy write load. +# +# memtable_cleanup_threshold defaults to 1 / (memtable_flush_writers + 1) +# memtable_cleanup_threshold: 0.11 + +# Specify the way Cassandra allocates and manages memtable memory. +# Options are: +# +# heap_buffers +# on heap nio buffers +# +# offheap_buffers +# off heap (direct) nio buffers +# +# offheap_objects +# off heap objects +memtable_allocation_type: heap_buffers + +# Limit memory usage for Merkle tree calculations during repairs. The default +# is 1/16th of the available heap. The main tradeoff is that smaller trees +# have less resolution, which can lead to over-streaming data. If you see heap +# pressure during repairs, consider lowering this, but you cannot go below +# one megabyte. If you see lots of over-streaming, consider raising +# this or using subrange repair. +# +# For more details see https://issues.apache.org/jira/browse/CASSANDRA-14096. +# +# repair_session_space_in_mb: + +# Total space to use for commit logs on disk. +# +# If space gets above this value, Cassandra will flush every dirty CF +# in the oldest segment and remove it. So a small total commitlog space +# will tend to cause more flush activity on less-active columnfamilies. +# +# The default value is the smaller of 8192, and 1/4 of the total space +# of the commitlog volume. +# +# commitlog_total_space_in_mb: 8192 + +# This sets the number of memtable flush writer threads per disk +# as well as the total number of memtables that can be flushed concurrently. +# These are generally a combination of compute and IO bound. +# +# Memtable flushing is more CPU efficient than memtable ingest and a single thread +# can keep up with the ingest rate of a whole server on a single fast disk +# until it temporarily becomes IO bound under contention typically with compaction. +# At that point you need multiple flush threads. At some point in the future +# it may become CPU bound all the time. +# +# You can tell if flushing is falling behind using the MemtablePool.BlockedOnAllocation +# metric which should be 0, but will be non-zero if threads are blocked waiting on flushing +# to free memory. +# +# memtable_flush_writers defaults to two for a single data directory. +# This means that two memtables can be flushed concurrently to the single data directory. +# If you have multiple data directories the default is one memtable flushing at a time +# but the flush will use a thread per data directory so you will get two or more writers. +# +# Two is generally enough to flush on a fast disk [array] mounted as a single data directory. +# Adding more flush writers will result in smaller more frequent flushes that introduce more +# compaction overhead. +# +# There is a direct tradeoff between number of memtables that can be flushed concurrently +# and flush size and frequency. More is not better you just need enough flush writers +# to never stall waiting for flushing to free memory. +# +#memtable_flush_writers: 2 + +# Total space to use for change-data-capture logs on disk. +# +# If space gets above this value, Cassandra will throw WriteTimeoutException +# on Mutations including tables with CDC enabled. A CDCCompactor is responsible +# for parsing the raw CDC logs and deleting them when parsing is completed. +# +# The default value is the min of 4096 mb and 1/8th of the total space +# of the drive where cdc_raw_directory resides. +# cdc_total_space_in_mb: 4096 + +# When we hit our cdc_raw limit and the CDCCompactor is either running behind +# or experiencing backpressure, we check at the following interval to see if any +# new space for cdc-tracked tables has been made available. Default to 250ms +# cdc_free_space_check_interval_ms: 250 + +# A fixed memory pool size in MB for for SSTable index summaries. If left +# empty, this will default to 5% of the heap size. If the memory usage of +# all index summaries exceeds this limit, SSTables with low read rates will +# shrink their index summaries in order to meet this limit. However, this +# is a best-effort process. In extreme conditions Cassandra may need to use +# more than this amount of memory. +index_summary_capacity_in_mb: + +# How frequently index summaries should be resampled. This is done +# periodically to redistribute memory from the fixed-size pool to sstables +# proportional their recent read rates. Setting to -1 will disable this +# process, leaving existing index summaries at their current sampling level. +index_summary_resize_interval_in_minutes: 60 + +# Whether to, when doing sequential writing, fsync() at intervals in +# order to force the operating system to flush the dirty +# buffers. Enable this to avoid sudden dirty buffer flushing from +# impacting read latencies. Almost always a good idea on SSDs; not +# necessarily on platters. +trickle_fsync: false +trickle_fsync_interval_in_kb: 10240 + +# TCP port, for commands and data +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +storage_port: 7000 + +# SSL port, for legacy encrypted communication. This property is unused unless enabled in +# server_encryption_options (see below). As of cassandra 4.0, this property is deprecated +# as a single port can be used for either/both secure and insecure connections. +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +ssl_storage_port: 7001 + +# Address or interface to bind to and tell other Cassandra nodes to connect to. +# You _must_ change this if you want multiple nodes to be able to communicate! +# +# Set listen_address OR listen_interface, not both. +# +# Leaving it blank leaves it up to InetAddress.getLocalHost(). This +# will always do the Right Thing _if_ the node is properly configured +# (hostname, name resolution, etc), and the Right Thing is to use the +# address associated with the hostname (it might not be). If unresolvable +# it will fall back to InetAddress.getLoopbackAddress(), which is wrong for production systems. +# +# Setting listen_address to 0.0.0.0 is always wrong. +# +listen_address: 172.20.0.6 + +# Set listen_address OR listen_interface, not both. Interfaces must correspond +# to a single address, IP aliasing is not supported. +# listen_interface: eth0 + +# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address +# you can specify which should be chosen using listen_interface_prefer_ipv6. If false the first ipv4 +# address will be used. If true the first ipv6 address will be used. Defaults to false preferring +# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6. +# listen_interface_prefer_ipv6: false + +# Address to broadcast to other Cassandra nodes +# Leaving this blank will set it to the same value as listen_address +broadcast_address: 172.20.0.6 + +# When using multiple physical network interfaces, set this +# to true to listen on broadcast_address in addition to +# the listen_address, allowing nodes to communicate in both +# interfaces. +# Ignore this property if the network configuration automatically +# routes between the public and private networks such as EC2. +# listen_on_broadcast_address: false + +# Internode authentication backend, implementing IInternodeAuthenticator; +# used to allow/disallow connections from peer nodes. +# internode_authenticator: org.apache.cassandra.auth.AllowAllInternodeAuthenticator + +# Whether to start the native transport server. +# The address on which the native transport is bound is defined by rpc_address. +start_native_transport: true +# port for the CQL native transport to listen for clients on +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +native_transport_port: 9042 +# Enabling native transport encryption in client_encryption_options allows you to either use +# encryption for the standard port or to use a dedicated, additional port along with the unencrypted +# standard native_transport_port. +# Enabling client encryption and keeping native_transport_port_ssl disabled will use encryption +# for native_transport_port. Setting native_transport_port_ssl to a different value +# from native_transport_port will use encryption for native_transport_port_ssl while +# keeping native_transport_port unencrypted. +# native_transport_port_ssl: 9142 +# The maximum threads for handling requests (note that idle threads are stopped +# after 30 seconds so there is not corresponding minimum setting). +# native_transport_max_threads: 128 +# +# The maximum size of allowed frame. Frame (requests) larger than this will +# be rejected as invalid. The default is 256MB. If you're changing this parameter, +# you may want to adjust max_value_size_in_mb accordingly. This should be positive and less than 2048. +# native_transport_max_frame_size_in_mb: 256 + +# The maximum number of concurrent client connections. +# The default is -1, which means unlimited. +# native_transport_max_concurrent_connections: -1 + +# The maximum number of concurrent client connections per source ip. +# The default is -1, which means unlimited. +# native_transport_max_concurrent_connections_per_ip: -1 + +# Controls whether Cassandra honors older, yet currently supported, protocol versions. +# The default is true, which means all supported protocols will be honored. +native_transport_allow_older_protocols: true + +# Controls when idle client connections are closed. Idle connections are ones that had neither reads +# nor writes for a time period. +# +# Clients may implement heartbeats by sending OPTIONS native protocol message after a timeout, which +# will reset idle timeout timer on the server side. To close idle client connections, corresponding +# values for heartbeat intervals have to be set on the client side. +# +# Idle connection timeouts are disabled by default. +# native_transport_idle_timeout_in_ms: 60000 + +# The address or interface to bind the native transport server to. +# +# Set rpc_address OR rpc_interface, not both. +# +# Leaving rpc_address blank has the same effect as on listen_address +# (i.e. it will be based on the configured hostname of the node). +# +# Note that unlike listen_address, you can specify 0.0.0.0, but you must also +# set broadcast_rpc_address to a value other than 0.0.0.0. +# +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +rpc_address: 0.0.0.0 + +# Set rpc_address OR rpc_interface, not both. Interfaces must correspond +# to a single address, IP aliasing is not supported. +# rpc_interface: eth1 + +# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address +# you can specify which should be chosen using rpc_interface_prefer_ipv6. If false the first ipv4 +# address will be used. If true the first ipv6 address will be used. Defaults to false preferring +# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6. +# rpc_interface_prefer_ipv6: false + +# RPC address to broadcast to drivers and other Cassandra nodes. This cannot +# be set to 0.0.0.0. If left blank, this will be set to the value of +# rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must +# be set. +broadcast_rpc_address: 172.20.0.6 + +# enable or disable keepalive on rpc/native connections +rpc_keepalive: true + +# Uncomment to set socket buffer size for internode communication +# Note that when setting this, the buffer size is limited by net.core.wmem_max +# and when not setting it it is defined by net.ipv4.tcp_wmem +# See also: +# /proc/sys/net/core/wmem_max +# /proc/sys/net/core/rmem_max +# /proc/sys/net/ipv4/tcp_wmem +# /proc/sys/net/ipv4/tcp_wmem +# and 'man tcp' +# internode_socket_send_buffer_size_in_bytes: + +# Uncomment to set socket buffer size for internode communication +# Note that when setting this, the buffer size is limited by net.core.wmem_max +# and when not setting it it is defined by net.ipv4.tcp_wmem +# internode_socket_receive_buffer_size_in_bytes: + +# Set to true to have Cassandra create a hard link to each sstable +# flushed or streamed locally in a backups/ subdirectory of the +# keyspace data. Removing these links is the operator's +# responsibility. +incremental_backups: false + +# Whether or not to take a snapshot before each compaction. Be +# careful using this option, since Cassandra won't clean up the +# snapshots for you. Mostly useful if you're paranoid when there +# is a data format change. +snapshot_before_compaction: false + +# Whether or not a snapshot is taken of the data before keyspace truncation +# or dropping of column families. The STRONGLY advised default of true +# should be used to provide data safety. If you set this flag to false, you will +# lose data on truncation or drop. +auto_snapshot: true + +# The act of creating or clearing a snapshot involves creating or removing +# potentially tens of thousands of links, which can cause significant performance +# impact, especially on consumer grade SSDs. A non-zero value here can +# be used to throttle these links to avoid negative performance impact of +# taking and clearing snapshots +snapshot_links_per_second: 0 + +# Granularity of the collation index of rows within a partition. +# Increase if your rows are large, or if you have a very large +# number of rows per partition. The competing goals are these: +# +# - a smaller granularity means more index entries are generated +# and looking up rows withing the partition by collation column +# is faster +# - but, Cassandra will keep the collation index in memory for hot +# rows (as part of the key cache), so a larger granularity means +# you can cache more hot rows +column_index_size_in_kb: 64 + +# Per sstable indexed key cache entries (the collation index in memory +# mentioned above) exceeding this size will not be held on heap. +# This means that only partition information is held on heap and the +# index entries are read from disk. +# +# Note that this size refers to the size of the +# serialized index information and not the size of the partition. +column_index_cache_size_in_kb: 2 + +# Number of simultaneous compactions to allow, NOT including +# validation "compactions" for anti-entropy repair. Simultaneous +# compactions can help preserve read performance in a mixed read/write +# workload, by mitigating the tendency of small sstables to accumulate +# during a single long running compactions. The default is usually +# fine and if you experience problems with compaction running too +# slowly or too fast, you should look at +# compaction_throughput_mb_per_sec first. +# +# concurrent_compactors defaults to the smaller of (number of disks, +# number of cores), with a minimum of 2 and a maximum of 8. +# +# If your data directories are backed by SSD, you should increase this +# to the number of cores. +#concurrent_compactors: 1 + +# Number of simultaneous repair validations to allow. If not set or set to +# a value less than 1, it defaults to the value of concurrent_compactors. +# To set a value greeater than concurrent_compactors at startup, the system +# property cassandra.allow_unlimited_concurrent_validations must be set to +# true. To dynamically resize to a value > concurrent_compactors on a running +# node, first call the bypassConcurrentValidatorsLimit method on the +# org.apache.cassandra.db:type=StorageService mbean +# concurrent_validations: 0 + +# Number of simultaneous materialized view builder tasks to allow. +concurrent_materialized_view_builders: 1 + +# Throttles compaction to the given total throughput across the entire +# system. The faster you insert data, the faster you need to compact in +# order to keep the sstable count down, but in general, setting this to +# 16 to 32 times the rate you are inserting data is more than sufficient. +# Setting this to 0 disables throttling. Note that this accounts for all types +# of compaction, including validation compaction (building Merkle trees +# for repairs). +compaction_throughput_mb_per_sec: 64 + +# When compacting, the replacement sstable(s) can be opened before they +# are completely written, and used in place of the prior sstables for +# any range that has been written. This helps to smoothly transfer reads +# between the sstables, reducing page cache churn and keeping hot rows hot +sstable_preemptive_open_interval_in_mb: 50 + +# When enabled, permits Cassandra to zero-copy stream entire eligible +# SSTables between nodes, including every component. +# This speeds up the network transfer significantly subject to +# throttling specified by stream_throughput_outbound_megabits_per_sec. +# Enabling this will reduce the GC pressure on sending and receiving node. +# When unset, the default is enabled. While this feature tries to keep the +# disks balanced, it cannot guarantee it. This feature will be automatically +# disabled if internode encryption is enabled. +# stream_entire_sstables: true + +# Throttles all outbound streaming file transfers on this node to the +# given total throughput in Mbps. This is necessary because Cassandra does +# mostly sequential IO when streaming data during bootstrap or repair, which +# can lead to saturating the network connection and degrading rpc performance. +# When unset, the default is 200 Mbps or 25 MB/s. +# stream_throughput_outbound_megabits_per_sec: 200 + +# Throttles all streaming file transfer between the datacenters, +# this setting allows users to throttle inter dc stream throughput in addition +# to throttling all network stream traffic as configured with +# stream_throughput_outbound_megabits_per_sec +# When unset, the default is 200 Mbps or 25 MB/s +# inter_dc_stream_throughput_outbound_megabits_per_sec: 200 + +# Server side timeouts for requests. The server will return a timeout exception +# to the client if it can't complete an operation within the corresponding +# timeout. Those settings are a protection against: +# 1) having client wait on an operation that might never terminate due to some +# failures. +# 2) operations that use too much CPU/read too much data (leading to memory build +# up) by putting a limit to how long an operation will execute. +# For this reason, you should avoid putting these settings too high. In other words, +# if you are timing out requests because of underlying resource constraints then +# increasing the timeout will just cause more problems. Of course putting them too +# low is equally ill-advised since clients could get timeouts even for successful +# operations just because the timeout setting is too tight. + +# How long the coordinator should wait for read operations to complete. +# Lowest acceptable value is 10 ms. +read_request_timeout_in_ms: 50000 +# How long the coordinator should wait for seq or index scans to complete. +# Lowest acceptable value is 10 ms. +range_request_timeout_in_ms: 10000 +# How long the coordinator should wait for writes to complete. +# Lowest acceptable value is 10 ms. +write_request_timeout_in_ms: 20000 +# How long the coordinator should wait for counter writes to complete. +# Lowest acceptable value is 10 ms. +counter_write_request_timeout_in_ms: 5000 +# How long a coordinator should continue to retry a CAS operation +# that contends with other proposals for the same row. +# Lowest acceptable value is 10 ms. +cas_contention_timeout_in_ms: 1000 +# How long the coordinator should wait for truncates to complete +# (This can be much longer, because unless auto_snapshot is disabled +# we need to flush first so we can snapshot before removing the data.) +# Lowest acceptable value is 10 ms. +truncate_request_timeout_in_ms: 60000 +# The default timeout for other, miscellaneous operations. +# Lowest acceptable value is 10 ms. +request_timeout_in_ms: 100000 + +# Defensive settings for protecting Cassandra from true network partitions. +# See (CASSANDRA-14358) for details. +# +# The amount of time to wait for internode tcp connections to establish. +# internode_tcp_connect_timeout_in_ms: 2000 +# +# The amount of time unacknowledged data is allowed on a connection before we throw out the connection +# Note this is only supported on Linux + epoll, and it appears to behave oddly above a setting of 30000 +# (it takes much longer than 30s) as of Linux 4.12. If you want something that high set this to 0 +# which picks up the OS default and configure the net.ipv4.tcp_retries2 sysctl to be ~8. +# internode_tcp_user_timeout_in_ms: 30000 + +# The amount of time unacknowledged data is allowed on a streaming connection. +# The default is 5 minutes. Increase it or set it to 0 in order to increase the timeout. +# internode_streaming_tcp_user_timeout_in_ms: 300000 + +# Global, per-endpoint and per-connection limits imposed on messages queued for delivery to other nodes +# and waiting to be processed on arrival from other nodes in the cluster. These limits are applied to the on-wire +# size of the message being sent or received. +# +# The basic per-link limit is consumed in isolation before any endpoint or global limit is imposed. +# Each node-pair has three links: urgent, small and large. So any given node may have a maximum of +# N*3*(internode_application_send_queue_capacity_in_bytes+internode_application_receive_queue_capacity_in_bytes) +# messages queued without any coordination between them although in practice, with token-aware routing, only RF*tokens +# nodes should need to communicate with significant bandwidth. +# +# The per-endpoint limit is imposed on all messages exceeding the per-link limit, simultaneously with the global limit, +# on all links to or from a single node in the cluster. +# The global limit is imposed on all messages exceeding the per-link limit, simultaneously with the per-endpoint limit, +# on all links to or from any node in the cluster. +# +# internode_application_send_queue_capacity_in_bytes: 4194304 #4MiB +# internode_application_send_queue_reserve_endpoint_capacity_in_bytes: 134217728 #128MiB +# internode_application_send_queue_reserve_global_capacity_in_bytes: 536870912 #512MiB +# internode_application_receive_queue_capacity_in_bytes: 4194304 #4MiB +# internode_application_receive_queue_reserve_endpoint_capacity_in_bytes: 134217728 #128MiB +# internode_application_receive_queue_reserve_global_capacity_in_bytes: 536870912 #512MiB + + +# How long before a node logs slow queries. Select queries that take longer than +# this timeout to execute, will generate an aggregated log message, so that slow queries +# can be identified. Set this value to zero to disable slow query logging. +slow_query_log_timeout_in_ms: 500 + +# Enable operation timeout information exchange between nodes to accurately +# measure request timeouts. If disabled, replicas will assume that requests +# were forwarded to them instantly by the coordinator, which means that +# under overload conditions we will waste that much extra time processing +# already-timed-out requests. +# +# Warning: It is generally assumed that users have setup NTP on their clusters, and that clocks are modestly in sync, +# since this is a requirement for general correctness of last write wins. +#cross_node_timeout: true + +# Set keep-alive period for streaming +# This node will send a keep-alive message periodically with this period. +# If the node does not receive a keep-alive message from the peer for +# 2 keep-alive cycles the stream session times out and fail +# Default value is 300s (5 minutes), which means stalled stream +# times out in 10 minutes by default +# streaming_keep_alive_period_in_secs: 300 + +# Limit number of connections per host for streaming +# Increase this when you notice that joins are CPU-bound rather that network +# bound (for example a few nodes with big files). +# streaming_connections_per_host: 1 + + +# phi value that must be reached for a host to be marked down. +# most users should never need to adjust this. +# phi_convict_threshold: 8 + +# endpoint_snitch -- Set this to a class that implements +# IEndpointSnitch. The snitch has two functions: +# +# - it teaches Cassandra enough about your network topology to route +# requests efficiently +# - it allows Cassandra to spread replicas around your cluster to avoid +# correlated failures. It does this by grouping machines into +# "datacenters" and "racks." Cassandra will do its best not to have +# more than one replica on the same "rack" (which may not actually +# be a physical location) +# +# CASSANDRA WILL NOT ALLOW YOU TO SWITCH TO AN INCOMPATIBLE SNITCH +# ONCE DATA IS INSERTED INTO THE CLUSTER. This would cause data loss. +# This means that if you start with the default SimpleSnitch, which +# locates every node on "rack1" in "datacenter1", your only options +# if you need to add another datacenter are GossipingPropertyFileSnitch +# (and the older PFS). From there, if you want to migrate to an +# incompatible snitch like Ec2Snitch you can do it by adding new nodes +# under Ec2Snitch (which will locate them in a new "datacenter") and +# decommissioning the old ones. +# +# Out of the box, Cassandra provides: +# +# SimpleSnitch: +# Treats Strategy order as proximity. This can improve cache +# locality when disabling read repair. Only appropriate for +# single-datacenter deployments. +# +# GossipingPropertyFileSnitch +# This should be your go-to snitch for production use. The rack +# and datacenter for the local node are defined in +# cassandra-rackdc.properties and propagated to other nodes via +# gossip. If cassandra-topology.properties exists, it is used as a +# fallback, allowing migration from the PropertyFileSnitch. +# +# PropertyFileSnitch: +# Proximity is determined by rack and data center, which are +# explicitly configured in cassandra-topology.properties. +# +# Ec2Snitch: +# Appropriate for EC2 deployments in a single Region. Loads Region +# and Availability Zone information from the EC2 API. The Region is +# treated as the datacenter, and the Availability Zone as the rack. +# Only private IPs are used, so this will not work across multiple +# Regions. +# +# Ec2MultiRegionSnitch: +# Uses public IPs as broadcast_address to allow cross-region +# connectivity. (Thus, you should set seed addresses to the public +# IP as well.) You will need to open the storage_port or +# ssl_storage_port on the public IP firewall. (For intra-Region +# traffic, Cassandra will switch to the private IP after +# establishing a connection.) +# +# RackInferringSnitch: +# Proximity is determined by rack and data center, which are +# assumed to correspond to the 3rd and 2nd octet of each node's IP +# address, respectively. Unless this happens to match your +# deployment conventions, this is best used as an example of +# writing a custom Snitch class and is provided in that spirit. +# +# You can use a custom Snitch by setting this to the full class name +# of the snitch, which will be assumed to be on your classpath. +endpoint_snitch: GossipingPropertyFileSnitch + +# controls how often to perform the more expensive part of host score +# calculation +dynamic_snitch_update_interval_in_ms: 100 +# controls how often to reset all host scores, allowing a bad host to +# possibly recover +dynamic_snitch_reset_interval_in_ms: 600000 +# if set greater than zero, this will allow +# 'pinning' of replicas to hosts in order to increase cache capacity. +# The badness threshold will control how much worse the pinned host has to be +# before the dynamic snitch will prefer other replicas over it. This is +# expressed as a double which represents a percentage. Thus, a value of +# 0.2 means Cassandra would continue to prefer the static snitch values +# until the pinned host was 20% worse than the fastest. +dynamic_snitch_badness_threshold: 1.0 + +# Configure server-to-server internode encryption +# +# JVM and netty defaults for supported SSL socket protocols and cipher suites can +# be replaced using custom encryption options. This is not recommended +# unless you have policies in place that dictate certain settings, or +# need to disable vulnerable ciphers or protocols in case the JVM cannot +# be updated. +# +# FIPS compliant settings can be configured at JVM level and should not +# involve changing encryption settings here: +# https://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/FIPS.html +# +# **NOTE** this default configuration is an insecure configuration. If you need to +# enable server-to-server encryption generate server keystores (and truststores for mutual +# authentication) per: +# http://download.oracle.com/javase/8/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore +# Then perform the following configuration changes: +# +# Step 1: Set internode_encryption= and explicitly set optional=true. Restart all nodes +# +# Step 2: Set optional=false (or remove it) and if you generated truststores and want to use mutual +# auth set require_client_auth=true. Restart all nodes +server_encryption_options: + # On outbound connections, determine which type of peers to securely connect to. + # The available options are : + # none : Do not encrypt outgoing connections + # dc : Encrypt connections to peers in other datacenters but not within datacenters + # rack : Encrypt connections to peers in other racks but not within racks + # all : Always use encrypted connections + internode_encryption: none + # When set to true, encrypted and unencrypted connections are allowed on the storage_port + # This should _only be true_ while in unencrypted or transitional operation + # optional defaults to true if internode_encryption is none + # optional: true + # If enabled, will open up an encrypted listening socket on ssl_storage_port. Should only be used + # during upgrade to 4.0; otherwise, set to false. + enable_legacy_ssl_storage_port: false + # Set to a valid keystore if internode_encryption is dc, rack or all + keystore: conf/.keystore + keystore_password: cassandra + # Verify peer server certificates + require_client_auth: false + # Set to a valid trustore if require_client_auth is true + truststore: conf/.truststore + truststore_password: cassandra + # Verify that the host name in the certificate matches the connected host + require_endpoint_verification: false + # More advanced defaults: + # protocol: TLS + # store_type: JKS + # cipher_suites: [ + # TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, + # TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA, + # TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, TLS_RSA_WITH_AES_128_GCM_SHA256, TLS_RSA_WITH_AES_128_CBC_SHA, + # TLS_RSA_WITH_AES_256_CBC_SHA + # ] + +# Configure client-to-server encryption. +# +# **NOTE** this default configuration is an insecure configuration. If you need to +# enable client-to-server encryption generate server keystores (and truststores for mutual +# authentication) per: +# http://download.oracle.com/javase/8/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore +# Then perform the following configuration changes: +# +# Step 1: Set enabled=true and explicitly set optional=true. Restart all nodes +# +# Step 2: Set optional=false (or remove it) and if you generated truststores and want to use mutual +# auth set require_client_auth=true. Restart all nodes +client_encryption_options: + # Enable client-to-server encryption + enabled: false + # When set to true, encrypted and unencrypted connections are allowed on the native_transport_port + # This should _only be true_ while in unencrypted or transitional operation + # optional defaults to true when enabled is false, and false when enabled is true. + # optional: true + # Set keystore and keystore_password to valid keystores if enabled is true + keystore: conf/.keystore + keystore_password: cassandra + # Verify client certificates + require_client_auth: false + # Set trustore and truststore_password if require_client_auth is true + # truststore: conf/.truststore + # truststore_password: cassandra + # More advanced defaults: + # protocol: TLS + # store_type: JKS + # cipher_suites: [ + # TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, + # TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA, + # TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, TLS_RSA_WITH_AES_128_GCM_SHA256, TLS_RSA_WITH_AES_128_CBC_SHA, + # TLS_RSA_WITH_AES_256_CBC_SHA + # ] + +# internode_compression controls whether traffic between nodes is +# compressed. +# Can be: +# +# all +# all traffic is compressed +# +# dc +# traffic between different datacenters is compressed +# +# none +# nothing is compressed. +internode_compression: dc + +# Enable or disable tcp_nodelay for inter-dc communication. +# Disabling it will result in larger (but fewer) network packets being sent, +# reducing overhead from the TCP protocol itself, at the cost of increasing +# latency if you block for cross-datacenter responses. +inter_dc_tcp_nodelay: false + +# TTL for different trace types used during logging of the repair process. +tracetype_query_ttl: 86400 +tracetype_repair_ttl: 604800 + +# If unset, all GC Pauses greater than gc_log_threshold_in_ms will log at +# INFO level +# UDFs (user defined functions) are disabled by default. +# As of Cassandra 3.0 there is a sandbox in place that should prevent execution of evil code. +enable_user_defined_functions: false + +# Enables scripted UDFs (JavaScript UDFs). +# Java UDFs are always enabled, if enable_user_defined_functions is true. +# Enable this option to be able to use UDFs with "language javascript" or any custom JSR-223 provider. +# This option has no effect, if enable_user_defined_functions is false. +enable_scripted_user_defined_functions: false + +# The default Windows kernel timer and scheduling resolution is 15.6ms for power conservation. +# Lowering this value on Windows can provide much tighter latency and better throughput, however +# some virtualized environments may see a negative performance impact from changing this setting +# below their system default. The sysinternals 'clockres' tool can confirm your system's default +# setting. +windows_timer_interval: 1 + + +# Enables encrypting data at-rest (on disk). Different key providers can be plugged in, but the default reads from +# a JCE-style keystore. A single keystore can hold multiple keys, but the one referenced by +# the "key_alias" is the only key that will be used for encrypt opertaions; previously used keys +# can still (and should!) be in the keystore and will be used on decrypt operations +# (to handle the case of key rotation). +# +# It is strongly recommended to download and install Java Cryptography Extension (JCE) +# Unlimited Strength Jurisdiction Policy Files for your version of the JDK. +# (current link: http://www.oracle.com/technetwork/java/javase/downloads/jce8-download-2133166.html) +# +# Currently, only the following file types are supported for transparent data encryption, although +# more are coming in future cassandra releases: commitlog, hints +transparent_data_encryption_options: + enabled: false + chunk_length_kb: 64 + cipher: AES/CBC/PKCS5Padding + key_alias: testing:1 + # CBC IV length for AES needs to be 16 bytes (which is also the default size) + # iv_length: 16 + key_provider: + - class_name: org.apache.cassandra.security.JKSKeyProvider + parameters: + - keystore: conf/.keystore + keystore_password: cassandra + store_type: JCEKS + key_password: cassandra + + +##################### +# SAFETY THRESHOLDS # +##################### + +# When executing a scan, within or across a partition, we need to keep the +# tombstones seen in memory so we can return them to the coordinator, which +# will use them to make sure other replicas also know about the deleted rows. +# With workloads that generate a lot of tombstones, this can cause performance +# problems and even exaust the server heap. +# (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets) +# Adjust the thresholds here if you understand the dangers and want to +# scan more tombstones anyway. These thresholds may also be adjusted at runtime +# using the StorageService mbean. +tombstone_warn_threshold: 1000 +tombstone_failure_threshold: 100000 + +# Filtering and secondary index queries at read consistency levels above ONE/LOCAL_ONE use a +# mechanism called replica filtering protection to ensure that results from stale replicas do +# not violate consistency. (See CASSANDRA-8272 and CASSANDRA-15907 for more details.) This +# mechanism materializes replica results by partition on-heap at the coordinator. The more possibly +# stale results returned by the replicas, the more rows materialized during the query. +replica_filtering_protection: + # These thresholds exist to limit the damage severely out-of-date replicas can cause during these + # queries. They limit the number of rows from all replicas individual index and filtering queries + # can materialize on-heap to return correct results at the desired read consistency level. + # + # "cached_replica_rows_warn_threshold" is the per-query threshold at which a warning will be logged. + # "cached_replica_rows_fail_threshold" is the per-query threshold at which the query will fail. + # + # These thresholds may also be adjusted at runtime using the StorageService mbean. + # + # If the failure threshold is breached, it is likely that either the current page/fetch size + # is too large or one or more replicas is severely out-of-sync and in need of repair. + cached_rows_warn_threshold: 2000 + cached_rows_fail_threshold: 32000 + +# Log WARN on any multiple-partition batch size exceeding this value. 5kb per batch by default. +# Caution should be taken on increasing the size of this threshold as it can lead to node instability. +batch_size_warn_threshold_in_kb: 5 + +# Fail any multiple-partition batch exceeding this value. 50kb (10x warn threshold) by default. +batch_size_fail_threshold_in_kb: 50 + +# Log WARN on any batches not of type LOGGED than span across more partitions than this limit +unlogged_batch_across_partitions_warn_threshold: 10 + +# Log a warning when compacting partitions larger than this value +compaction_large_partition_warning_threshold_mb: 100 + +# GC Pauses greater than 200 ms will be logged at INFO level +# This threshold can be adjusted to minimize logging if necessary +# gc_log_threshold_in_ms: 200 + +# GC Pauses greater than gc_warn_threshold_in_ms will be logged at WARN level +# Adjust the threshold based on your application throughput requirement. Setting to 0 +# will deactivate the feature. +# gc_warn_threshold_in_ms: 1000 + +# Maximum size of any value in SSTables. Safety measure to detect SSTable corruption +# early. Any value size larger than this threshold will result into marking an SSTable +# as corrupted. This should be positive and less than 2048. +# max_value_size_in_mb: 256 + +# Track a metric per keyspace indicating whether replication achieved the ideal consistency +# level for writes without timing out. This is different from the consistency level requested by +# each write which may be lower in order to facilitate availability. +# ideal_consistency_level: EACH_QUORUM + +# Automatically upgrade sstables after upgrade - if there is no ordinary compaction to do, the +# oldest non-upgraded sstable will get upgraded to the latest version +# automatic_sstable_upgrade: false +# Limit the number of concurrent sstable upgrades +# max_concurrent_automatic_sstable_upgrades: 1 + +# Audit logging - Logs every incoming CQL command request, authentication to a node. See the docs +# on audit_logging for full details about the various configuration options. +audit_logging_options: + enabled: false + logger: + - class_name: BinAuditLogger + # audit_logs_dir: + # included_keyspaces: + # excluded_keyspaces: system, system_schema, system_virtual_schema + # included_categories: + # excluded_categories: + # included_users: + # excluded_users: + # roll_cycle: HOURLY + # block: true + # max_queue_weight: 268435456 # 256 MiB + # max_log_size: 17179869184 # 16 GiB + ## archive command is "/path/to/script.sh %path" where %path is replaced with the file being rolled: + # archive_command: + # max_archive_retries: 10 + + +# default options for full query logging - these can be overridden from command line when executing +# nodetool enablefullquerylog +#full_query_logging_options: + # log_dir: + # roll_cycle: HOURLY + # block: true + # max_queue_weight: 268435456 # 256 MiB + # max_log_size: 17179869184 # 16 GiB + ## archive command is "/path/to/script.sh %path" where %path is replaced with the file being rolled: + # archive_command: + # max_archive_retries: 10 + +# validate tombstones on reads and compaction +# can be either "disabled", "warn" or "exception" +# corrupted_tombstone_strategy: disabled + +# Diagnostic Events # +# If enabled, diagnostic events can be helpful for troubleshooting operational issues. Emitted events contain details +# on internal state and temporal relationships across events, accessible by clients via JMX. +diagnostic_events_enabled: false + +# Use native transport TCP message coalescing. If on upgrade to 4.0 you found your throughput decreasing, and in +# particular you run an old kernel or have very fewer client connections, this option might be worth evaluating. +#native_transport_flush_in_batches_legacy: false + +# Enable tracking of repaired state of data during reads and comparison between replicas +# Mismatches between the repaired sets of replicas can be characterized as either confirmed +# or unconfirmed. In this context, unconfirmed indicates that the presence of pending repair +# sessions, unrepaired partition tombstones, or some other condition means that the disparity +# cannot be considered conclusive. Confirmed mismatches should be a trigger for investigation +# as they may be indicative of corruption or data loss. +# There are separate flags for range vs partition reads as single partition reads are only tracked +# when CL > 1 and a digest mismatch occurs. Currently, range queries don't use digests so if +# enabled for range reads, all range reads will include repaired data tracking. As this adds +# some overhead, operators may wish to disable it whilst still enabling it for partition reads +repaired_data_tracking_for_range_reads_enabled: false +repaired_data_tracking_for_partition_reads_enabled: false +# If false, only confirmed mismatches will be reported. If true, a separate metric for unconfirmed +# mismatches will also be recorded. This is to avoid potential signal:noise issues are unconfirmed +# mismatches are less actionable than confirmed ones. +report_unconfirmed_repaired_data_mismatches: false + +# Having many tables and/or keyspaces negatively affects performance of many operations in the +# cluster. When the number of tables/keyspaces in the cluster exceeds the following thresholds +# a client warning will be sent back to the user when creating a table or keyspace. +# table_count_warn_threshold: 150 +# keyspace_count_warn_threshold: 40 + +######################### +# EXPERIMENTAL FEATURES # +######################### + +# Enables materialized view creation on this node. +# Materialized views are considered experimental and are not recommended for production use. +enable_materialized_views: true + +# Enables SASI index creation on this node. +# SASI indexes are considered experimental and are not recommended for production use. +enable_sasi_indexes: true + +# Enables creation of transiently replicated keyspaces on this node. +# Transient replication is experimental and is not recommended for production use. +enable_transient_replication: false + +# Enables the used of 'ALTER ... DROP COMPACT STORAGE' statements on this node. +# 'ALTER ... DROP COMPACT STORAGE' is considered experimental and is not recommended for production use. +enable_drop_compact_storage: false diff --git a/etc/cass1/commitlog_archiving.properties b/etc/cass1/commitlog_archiving.properties new file mode 100644 index 0000000..393259c --- /dev/null +++ b/etc/cass1/commitlog_archiving.properties @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# commitlog archiving configuration. Leave blank to disable. + +# Command to execute to archive a commitlog segment +# Parameters: %path => Fully qualified path of the segment to archive +# %name => Name of the commit log. +# Example: archive_command=/bin/ln %path /backup/%name +# +# Limitation: *_command= expects one command with arguments. STDOUT +# and STDIN or multiple commands cannot be executed. You might want +# to script multiple commands and add a pointer here. +archive_command= + +# Command to execute to make an archived commitlog live again. +# Parameters: %from is the full path to an archived commitlog segment (from restore_directories) +# %to is the live commitlog directory +# Example: restore_command=/bin/cp -f %from %to +restore_command= + +# Directory to scan the recovery files in. +restore_directories= + +# Restore mutations created up to and including this timestamp in GMT. +# Format: yyyy:MM:dd HH:mm:ss (2012:04:31 20:43:12) +# +# Recovery will continue through the segment when the first client-supplied +# timestamp greater than this time is encountered, but only mutations less than +# or equal to this timestamp will be applied. +restore_point_in_time= + +# precision of the timestamp used in the inserts (MILLISECONDS, MICROSECONDS, ...) +precision=MICROSECONDS diff --git a/etc/cass1/jvm-clients.options b/etc/cass1/jvm-clients.options new file mode 100644 index 0000000..6181ed0 --- /dev/null +++ b/etc/cass1/jvm-clients.options @@ -0,0 +1,10 @@ +########################################################################### +# jvm-clients.options # +# # +# See jvm8-clients.options and jvm11-clients.options for Java version # +# specific options. # +########################################################################### + +# intentionally left empty + +# The newline in the end of file is intentional diff --git a/etc/cass1/jvm-server.options b/etc/cass1/jvm-server.options new file mode 100644 index 0000000..46967f4 --- /dev/null +++ b/etc/cass1/jvm-server.options @@ -0,0 +1,188 @@ +########################################################################### +# jvm-server.options # +# # +# - all flags defined here will be used by cassandra to startup the JVM # +# - one flag should be specified per line # +# - lines that do not start with '-' will be ignored # +# - only static flags are accepted (no variables or parameters) # +# - dynamic flags will be appended to these on cassandra-env # +# # +# See jvm8-server.options and jvm11-server.options for Java version # +# specific options. # +########################################################################### + +###################### +# STARTUP PARAMETERS # +###################### + +# Uncomment any of the following properties to enable specific startup parameters + +# In a multi-instance deployment, multiple Cassandra instances will independently assume that all +# CPU processors are available to it. This setting allows you to specify a smaller set of processors +# and perhaps have affinity. +#-Dcassandra.available_processors=number_of_processors + +# The directory location of the cassandra.yaml file. +#-Dcassandra.config=directory + +# Sets the initial partitioner token for a node the first time the node is started. +#-Dcassandra.initial_token=token + +# Set to false to start Cassandra on a node but not have the node join the cluster. +#-Dcassandra.join_ring=true|false + +# Set to false to clear all gossip state for the node on restart. Use when you have changed node +# information in cassandra.yaml (such as listen_address). +#-Dcassandra.load_ring_state=true|false + +# Enable pluggable metrics reporter. See Pluggable metrics reporting in Cassandra 2.0.2. +#-Dcassandra.metricsReporterConfigFile=file + +# Set the port on which the CQL native transport listens for clients. (Default: 9042) +#-Dcassandra.native_transport_port=port + +# Overrides the partitioner. (Default: org.apache.cassandra.dht.Murmur3Partitioner) +#-Dcassandra.partitioner=partitioner + +# To replace a node that has died, restart a new node in its place specifying the address of the +# dead node. The new node must not have any data in its data directory, that is, it must be in the +# same state as before bootstrapping. +#-Dcassandra.replace_address=listen_address or broadcast_address of dead node + +# Allow restoring specific tables from an archived commit log. +#-Dcassandra.replayList=table + +# Allows overriding of the default RING_DELAY (30000ms), which is the amount of time a node waits +# before joining the ring. +#-Dcassandra.ring_delay_ms=ms + +# Set the SSL port for encrypted communication. (Default: 7001) +#-Dcassandra.ssl_storage_port=port + +# Set the port for inter-node communication. (Default: 7000) +#-Dcassandra.storage_port=port + +# Set the default location for the trigger JARs. (Default: conf/triggers) +#-Dcassandra.triggers_dir=directory + +# For testing new compaction and compression strategies. It allows you to experiment with different +# strategies and benchmark write performance differences without affecting the production workload. +#-Dcassandra.write_survey=true + +# To disable configuration via JMX of auth caches (such as those for credentials, permissions and +# roles). This will mean those config options can only be set (persistently) in cassandra.yaml +# and will require a restart for new values to take effect. +#-Dcassandra.disable_auth_caches_remote_configuration=true + +# To disable dynamic calculation of the page size used when indexing an entire partition (during +# initial index build/rebuild). If set to true, the page size will be fixed to the default of +# 10000 rows per page. +#-Dcassandra.force_default_indexing_page_size=true + +# Imposes an upper bound on hint lifetime below the normal min gc_grace_seconds +#-Dcassandra.maxHintTTL=max_hint_ttl_in_seconds + +######################## +# GENERAL JVM SETTINGS # +######################## + +# enable assertions. highly suggested for correct application functionality. +-ea + +# disable assertions for net.openhft.** because it runs out of memory by design +# if enabled and run for more than just brief testing +-da:net.openhft... + +# enable thread priorities, primarily so we can give periodic tasks +# a lower priority to avoid interfering with client workload +-XX:+UseThreadPriorities + +# Enable heap-dump if there's an OOM +-XX:+HeapDumpOnOutOfMemoryError + +# Per-thread stack size. +-Xss256k + +# Make sure all memory is faulted and zeroed on startup. +# This helps prevent soft faults in containers and makes +# transparent hugepage allocation more effective. +-XX:+AlwaysPreTouch + +# Disable biased locking as it does not benefit Cassandra. +-XX:-UseBiasedLocking + +# Enable thread-local allocation blocks and allow the JVM to automatically +# resize them at runtime. +-XX:+UseTLAB +-XX:+ResizeTLAB +-XX:+UseNUMA + +# http://www.evanjones.ca/jvm-mmap-pause.html +-XX:+PerfDisableSharedMem + +# Prefer binding to IPv4 network intefaces (when net.ipv6.bindv6only=1). See +# http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6342561 (short version: +# comment out this entry to enable IPv6 support). +-Djava.net.preferIPv4Stack=true + +### Debug options + +# uncomment to enable flight recorder +#-XX:+UnlockCommercialFeatures +#-XX:+FlightRecorder + +# uncomment to have Cassandra JVM listen for remote debuggers/profilers on port 1414 +#-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=1414 + +# uncomment to have Cassandra JVM log internal method compilation (developers only) +#-XX:+UnlockDiagnosticVMOptions +#-XX:+LogCompilation + +################# +# HEAP SETTINGS # +################# + +# Heap size is automatically calculated by cassandra-env based on this +# formula: max(min(1/2 ram, 1024MB), min(1/4 ram, 8GB)) +# That is: +# - calculate 1/2 ram and cap to 1024MB +# - calculate 1/4 ram and cap to 8192MB +# - pick the max +# +# For production use you may wish to adjust this for your environment. +# If that's the case, uncomment the -Xmx and Xms options below to override the +# automatic calculation of JVM heap memory. +# +# It is recommended to set min (-Xms) and max (-Xmx) heap sizes to +# the same value to avoid stop-the-world GC pauses during resize, and +# so that we can lock the heap in memory on startup to prevent any +# of it from being swapped out. +#-Xms4G +#-Xmx4G + +# Young generation size is automatically calculated by cassandra-env +# based on this formula: min(100 * num_cores, 1/4 * heap size) +# +# The main trade-off for the young generation is that the larger it +# is, the longer GC pause times will be. The shorter it is, the more +# expensive GC will be (usually). +# +# It is not recommended to set the young generation size if using the +# G1 GC, since that will override the target pause-time goal. +# More info: http://www.oracle.com/technetwork/articles/java/g1gc-1984535.html +# +# The example below assumes a modern 8-core+ machine for decent +# times. If in doubt, and if you do not particularly want to tweak, go +# 100 MB per physical CPU core. +#-Xmn800M + +################################### +# EXPIRATION DATE OVERFLOW POLICY # +################################### + +# Defines how to handle INSERT requests with TTL exceeding the maximum supported expiration date: +# * REJECT: this is the default policy and will reject any requests with expiration date timestamp after 2038-01-19T03:14:06+00:00. +# * CAP: any insert with TTL expiring after 2038-01-19T03:14:06+00:00 will expire on 2038-01-19T03:14:06+00:00 and the client will receive a warning. +# * CAP_NOWARN: same as previous, except that the client warning will not be emitted. +# +#-Dcassandra.expiration_date_overflow_policy=REJECT diff --git a/etc/cass1/jvm11-clients.options b/etc/cass1/jvm11-clients.options new file mode 100644 index 0000000..c88b7ab --- /dev/null +++ b/etc/cass1/jvm11-clients.options @@ -0,0 +1,29 @@ +########################################################################### +# jvm11-clients.options # +# # +# See jvm-clients.options. This file is specific for Java 11 and newer. # +########################################################################### + +################### +# JPMS SETTINGS # +################### + +-Djdk.attach.allowAttachSelf=true +--add-exports java.base/jdk.internal.misc=ALL-UNNAMED +--add-exports java.base/jdk.internal.ref=ALL-UNNAMED +--add-exports java.base/sun.nio.ch=ALL-UNNAMED +--add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED +--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED +--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED +--add-exports java.sql/java.sql=ALL-UNNAMED + +--add-opens java.base/java.lang.module=ALL-UNNAMED +--add-opens java.base/jdk.internal.loader=ALL-UNNAMED +--add-opens java.base/jdk.internal.ref=ALL-UNNAMED +--add-opens java.base/jdk.internal.reflect=ALL-UNNAMED +--add-opens java.base/jdk.internal.math=ALL-UNNAMED +--add-opens java.base/jdk.internal.module=ALL-UNNAMED +--add-opens java.base/jdk.internal.util.jar=ALL-UNNAMED +--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED + +# The newline in the end of file is intentional diff --git a/etc/cass1/jvm11-server.options b/etc/cass1/jvm11-server.options new file mode 100644 index 0000000..7e78467 --- /dev/null +++ b/etc/cass1/jvm11-server.options @@ -0,0 +1,103 @@ +########################################################################### +# jvm11-server.options # +# # +# See jvm-server.options. This file is specific for Java 11 and newer. # +########################################################################### + +################# +# GC SETTINGS # +################# + + + +### CMS Settings +-XX:+UseConcMarkSweepGC +-XX:+CMSParallelRemarkEnabled +-XX:SurvivorRatio=8 +-XX:MaxTenuringThreshold=1 +-XX:CMSInitiatingOccupancyFraction=75 +-XX:+UseCMSInitiatingOccupancyOnly +-XX:CMSWaitDuration=10000 +-XX:+CMSParallelInitialMarkEnabled +-XX:+CMSEdenChunksRecordAlways +## some JVMs will fill up their heap when accessed via JMX, see CASSANDRA-6541 +-XX:+CMSClassUnloadingEnabled + + + +### G1 Settings +## Use the Hotspot garbage-first collector. +#-XX:+UseG1GC +#-XX:+ParallelRefProcEnabled + +# +## Have the JVM do less remembered set work during STW, instead +## preferring concurrent GC. Reduces p99.9 latency. +#-XX:G1RSetUpdatingPauseTimePercent=5 +# +## Main G1GC tunable: lowering the pause target will lower throughput and vise versa. +## 200ms is the JVM default and lowest viable setting +## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml. +#-XX:MaxGCPauseMillis=500 + +## Optional G1 Settings +# Save CPU time on large (>= 16GB) heaps by delaying region scanning +# until the heap is 70% full. The default in Hotspot 8u40 is 40%. +#-XX:InitiatingHeapOccupancyPercent=70 + +# For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores. +# Otherwise equal to the number of cores when 8 or less. +# Machines with > 10 cores should try setting these to <= full cores. +#-XX:ParallelGCThreads=16 +# By default, ConcGCThreads is 1/4 of ParallelGCThreads. +# Setting both to the same value can reduce STW durations. +#-XX:ConcGCThreads=16 + + +### JPMS + +-Djdk.attach.allowAttachSelf=true +--add-exports java.base/jdk.internal.misc=ALL-UNNAMED +--add-exports java.base/jdk.internal.ref=ALL-UNNAMED +--add-exports java.base/sun.nio.ch=ALL-UNNAMED +--add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED +--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED +--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED +--add-exports java.sql/java.sql=ALL-UNNAMED + +--add-opens java.base/java.lang.module=ALL-UNNAMED +--add-opens java.base/jdk.internal.loader=ALL-UNNAMED +--add-opens java.base/jdk.internal.ref=ALL-UNNAMED +--add-opens java.base/jdk.internal.reflect=ALL-UNNAMED +--add-opens java.base/jdk.internal.math=ALL-UNNAMED +--add-opens java.base/jdk.internal.module=ALL-UNNAMED +--add-opens java.base/jdk.internal.util.jar=ALL-UNNAMED +--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED + + +### GC logging options -- uncomment to enable + +# Java 11 (and newer) GC logging options: +# See description of https://bugs.openjdk.java.net/browse/JDK-8046148 for details about the syntax +# The following is the equivalent to -XX:+PrintGCDetails -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M +#-Xlog:gc=info,heap*=trace,age*=debug,safepoint=info,promotion*=trace:file=/var/log/cassandra/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760 + +# Notes for Java 8 migration: +# +# -XX:+PrintGCDetails maps to -Xlog:gc*:... - i.e. add a '*' after "gc" +# -XX:+PrintGCDateStamps maps to decorator 'time' +# +# -XX:+PrintHeapAtGC maps to 'heap' with level 'trace' +# -XX:+PrintTenuringDistribution maps to 'age' with level 'debug' +# -XX:+PrintGCApplicationStoppedTime maps to 'safepoint' with level 'info' +# -XX:+PrintPromotionFailure maps to 'promotion' with level 'trace' +# -XX:PrintFLSStatistics=1 maps to 'freelist' with level 'trace' + +### Netty Options + +# On Java >= 9 Netty requires the io.netty.tryReflectionSetAccessible system property to be set to true to enable +# creation of direct buffers using Unsafe. Without it, this falls back to ByteBuffer.allocateDirect which has +# inferior performance and risks exceeding MaxDirectMemory +-Dio.netty.tryReflectionSetAccessible=true + +# The newline in the end of file is intentional diff --git a/etc/cass1/jvm8-clients.options b/etc/cass1/jvm8-clients.options new file mode 100644 index 0000000..7d1b2ef --- /dev/null +++ b/etc/cass1/jvm8-clients.options @@ -0,0 +1,9 @@ +########################################################################### +# jvm8-clients.options # +# # +# See jvm-clients.options. This file is specific for Java 8 and newer. # +########################################################################### + +# intentionally left empty + +# The newline in the end of file is intentional diff --git a/etc/cass1/jvm8-server.options b/etc/cass1/jvm8-server.options new file mode 100644 index 0000000..6214669 --- /dev/null +++ b/etc/cass1/jvm8-server.options @@ -0,0 +1,76 @@ +########################################################################### +# jvm8-server.options # +# # +# See jvm-server.options. This file is specific for Java 8 and newer. # +########################################################################### + +######################## +# GENERAL JVM SETTINGS # +######################## + +# allows lowering thread priority without being root on linux - probably +# not necessary on Windows but doesn't harm anything. +# see http://tech.stolsvik.com/2010/01/linux-java-thread-priorities-workaround.html +-XX:ThreadPriorityPolicy=42 + +################# +# GC SETTINGS # +################# + +### CMS Settings +-XX:+UseParNewGC +-XX:+UseConcMarkSweepGC +-XX:+CMSParallelRemarkEnabled +-XX:SurvivorRatio=8 +-XX:MaxTenuringThreshold=1 +-XX:CMSInitiatingOccupancyFraction=75 +-XX:+UseCMSInitiatingOccupancyOnly +-XX:CMSWaitDuration=10000 +-XX:+CMSParallelInitialMarkEnabled +-XX:+CMSEdenChunksRecordAlways +## some JVMs will fill up their heap when accessed via JMX, see CASSANDRA-6541 +-XX:+CMSClassUnloadingEnabled + +### G1 Settings +## Use the Hotspot garbage-first collector. +#-XX:+UseG1GC +#-XX:+ParallelRefProcEnabled + +# +## Have the JVM do less remembered set work during STW, instead +## preferring concurrent GC. Reduces p99.9 latency. +#-XX:G1RSetUpdatingPauseTimePercent=5 +# +## Main G1GC tunable: lowering the pause target will lower throughput and vise versa. +## 200ms is the JVM default and lowest viable setting +## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml. +#-XX:MaxGCPauseMillis=500 + +## Optional G1 Settings +# Save CPU time on large (>= 16GB) heaps by delaying region scanning +# until the heap is 70% full. The default in Hotspot 8u40 is 40%. +#-XX:InitiatingHeapOccupancyPercent=70 + +# For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores. +# Otherwise equal to the number of cores when 8 or less. +# Machines with > 10 cores should try setting these to <= full cores. +#-XX:ParallelGCThreads=16 +# By default, ConcGCThreads is 1/4 of ParallelGCThreads. +# Setting both to the same value can reduce STW durations. +#-XX:ConcGCThreads=16 + +### GC logging options -- uncomment to enable + +-XX:+PrintGCDetails +-XX:+PrintGCDateStamps +-XX:+PrintHeapAtGC +-XX:+PrintTenuringDistribution +-XX:+PrintGCApplicationStoppedTime +-XX:+PrintPromotionFailure +#-XX:PrintFLSStatistics=1 +#-Xloggc:/var/log/cassandra/gc.log +-XX:+UseGCLogFileRotation +-XX:NumberOfGCLogFiles=10 +-XX:GCLogFileSize=10M + +# The newline in the end of file is intentional diff --git a/etc/cass1/logback.xml b/etc/cass1/logback.xml new file mode 100644 index 0000000..e98fea4 --- /dev/null +++ b/etc/cass1/logback.xml @@ -0,0 +1,124 @@ + + + + + + + + + + + + + + INFO + + ${cassandra.logdir}/system.log + + + ${cassandra.logdir}/system.log.%d{yyyy-MM-dd}.%i.zip + + 50MB + 7 + 5GB + + + %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + + + + + + + ${cassandra.logdir}/debug.log + + + ${cassandra.logdir}/debug.log.%d{yyyy-MM-dd}.%i.zip + + 50MB + 7 + 5GB + + + %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + + + + + + + 1024 + 0 + true + + + + + + + + INFO + + + %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/etc/cass2/.gitignore b/etc/cass2/.gitignore new file mode 100644 index 0000000..e81e4e0 --- /dev/null +++ b/etc/cass2/.gitignore @@ -0,0 +1,11 @@ +# ignore everything +#* + +# except +# .gitignore +# !cassandra.yaml +# !jvm* +# !logback.xml +# !commitlog_archiving.properties +# !cassandra-rackdc.properties +# !cassandra-env.sh \ No newline at end of file diff --git a/etc/cass2/cassandra-env.sh b/etc/cass2/cassandra-env.sh new file mode 100644 index 0000000..2e3c8c9 --- /dev/null +++ b/etc/cass2/cassandra-env.sh @@ -0,0 +1,307 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +calculate_heap_sizes() +{ + case "`uname`" in + Linux) + system_memory_in_mb=`free -m | awk '/:/ {print $2;exit}'` + system_cpu_cores=`egrep -c 'processor([[:space:]]+):.*' /proc/cpuinfo` + ;; + FreeBSD) + system_memory_in_bytes=`sysctl hw.physmem | awk '{print $2}'` + system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024` + system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'` + ;; + SunOS) + system_memory_in_mb=`prtconf | awk '/Memory size:/ {print $3}'` + system_cpu_cores=`psrinfo | wc -l` + ;; + Darwin) + system_memory_in_bytes=`sysctl hw.memsize | awk '{print $2}'` + system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024` + system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'` + ;; + *) + # assume reasonable defaults for e.g. a modern desktop or + # cheap server + system_memory_in_mb="2048" + system_cpu_cores="2" + ;; + esac + + # some systems like the raspberry pi don't report cores, use at least 1 + if [ "$system_cpu_cores" -lt "1" ] + then + system_cpu_cores="1" + fi + + # set max heap size based on the following + # max(min(1/2 ram, 1024MB), min(1/4 ram, 8GB)) + # calculate 1/2 ram and cap to 1024MB + # calculate 1/4 ram and cap to 8192MB + # pick the max + half_system_memory_in_mb=`expr $system_memory_in_mb / 2` + quarter_system_memory_in_mb=`expr $half_system_memory_in_mb / 2` + if [ "$half_system_memory_in_mb" -gt "1024" ] + then + half_system_memory_in_mb="1024" + fi + if [ "$quarter_system_memory_in_mb" -gt "8192" ] + then + quarter_system_memory_in_mb="8192" + fi + if [ "$half_system_memory_in_mb" -gt "$quarter_system_memory_in_mb" ] + then + max_heap_size_in_mb="$half_system_memory_in_mb" + else + max_heap_size_in_mb="$quarter_system_memory_in_mb" + fi + MAX_HEAP_SIZE="${max_heap_size_in_mb}M" + + # Young gen: min(max_sensible_per_modern_cpu_core * num_cores, 1/4 * heap size) + max_sensible_yg_per_core_in_mb="100" + max_sensible_yg_in_mb=`expr $max_sensible_yg_per_core_in_mb "*" $system_cpu_cores` + + desired_yg_in_mb=`expr $max_heap_size_in_mb / 4` + + if [ "$desired_yg_in_mb" -gt "$max_sensible_yg_in_mb" ] + then + HEAP_NEWSIZE="${max_sensible_yg_in_mb}M" + else + HEAP_NEWSIZE="${desired_yg_in_mb}M" + fi +} + +# Sets the path where logback and GC logs are written. +if [ "x$CASSANDRA_LOG_DIR" = "x" ] ; then + CASSANDRA_LOG_DIR="$CASSANDRA_HOME/logs" +fi + +#GC log path has to be defined here because it needs to access CASSANDRA_HOME +if [ $JAVA_VERSION -ge 11 ] ; then + # See description of https://bugs.openjdk.java.net/browse/JDK-8046148 for details about the syntax + # The following is the equivalent to -XX:+PrintGCDetails -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M + echo "$JVM_OPTS" | grep -qe "-[X]log:gc" + if [ "$?" = "1" ] ; then # [X] to prevent ccm from replacing this line + # only add -Xlog:gc if it's not mentioned in jvm-server.options file + mkdir -p ${CASSANDRA_LOG_DIR} + JVM_OPTS="$JVM_OPTS -Xlog:gc=info,heap*=trace,age*=debug,safepoint=info,promotion*=trace:file=${CASSANDRA_LOG_DIR}/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760" + fi +else + # Java 8 + echo "$JVM_OPTS" | grep -qe "-[X]loggc" + if [ "$?" = "1" ] ; then # [X] to prevent ccm from replacing this line + # only add -Xlog:gc if it's not mentioned in jvm-server.options file + mkdir -p ${CASSANDRA_LOG_DIR} + JVM_OPTS="$JVM_OPTS -Xloggc:${CASSANDRA_LOG_DIR}/gc.log" + fi +fi + +# Check what parameters were defined on jvm-server.options file to avoid conflicts +echo $JVM_OPTS | grep -q Xmn +DEFINED_XMN=$? +echo $JVM_OPTS | grep -q Xmx +DEFINED_XMX=$? +echo $JVM_OPTS | grep -q Xms +DEFINED_XMS=$? +echo $JVM_OPTS | grep -q UseConcMarkSweepGC +USING_CMS=$? +echo $JVM_OPTS | grep -q +UseG1GC +USING_G1=$? + +# Override these to set the amount of memory to allocate to the JVM at +# start-up. For production use you may wish to adjust this for your +# environment. MAX_HEAP_SIZE is the total amount of memory dedicated +# to the Java heap. HEAP_NEWSIZE refers to the size of the young +# generation. Both MAX_HEAP_SIZE and HEAP_NEWSIZE should be either set +# or not (if you set one, set the other). +# +# The main trade-off for the young generation is that the larger it +# is, the longer GC pause times will be. The shorter it is, the more +# expensive GC will be (usually). +# +# The example HEAP_NEWSIZE assumes a modern 8-core+ machine for decent pause +# times. If in doubt, and if you do not particularly want to tweak, go with +# 100 MB per physical CPU core. + +#MAX_HEAP_SIZE="4G" +#HEAP_NEWSIZE="800M" + +# Set this to control the amount of arenas per-thread in glibc +#export MALLOC_ARENA_MAX=4 + +# only calculate the size if it's not set manually +if [ "x$MAX_HEAP_SIZE" = "x" ] && [ "x$HEAP_NEWSIZE" = "x" -o $USING_G1 -eq 0 ]; then + calculate_heap_sizes +elif [ "x$MAX_HEAP_SIZE" = "x" ] || [ "x$HEAP_NEWSIZE" = "x" -a $USING_G1 -ne 0 ]; then + echo "please set or unset MAX_HEAP_SIZE and HEAP_NEWSIZE in pairs when using CMS GC (see cassandra-env.sh)" + exit 1 +fi + +if [ "x$MALLOC_ARENA_MAX" = "x" ] ; then + export MALLOC_ARENA_MAX=4 +fi + +# We only set -Xms and -Xmx if they were not defined on jvm-server.options file +# If defined, both Xmx and Xms should be defined together. +if [ $DEFINED_XMX -ne 0 ] && [ $DEFINED_XMS -ne 0 ]; then + JVM_OPTS="$JVM_OPTS -Xms${MAX_HEAP_SIZE}" + JVM_OPTS="$JVM_OPTS -Xmx${MAX_HEAP_SIZE}" +elif [ $DEFINED_XMX -ne 0 ] || [ $DEFINED_XMS -ne 0 ]; then + echo "Please set or unset -Xmx and -Xms flags in pairs on jvm-server.options file." + exit 1 +fi + +# We only set -Xmn flag if it was not defined in jvm-server.options file +# and if the CMS GC is being used +# If defined, both Xmn and Xmx should be defined together. +if [ $DEFINED_XMN -eq 0 ] && [ $DEFINED_XMX -ne 0 ]; then + echo "Please set or unset -Xmx and -Xmn flags in pairs on jvm-server.options file." + exit 1 +elif [ $DEFINED_XMN -ne 0 ] && [ $USING_CMS -eq 0 ]; then + JVM_OPTS="$JVM_OPTS -Xmn${HEAP_NEWSIZE}" +fi + +# We fail to start if -Xmn is used with G1 GC is being used +# See comments for -Xmn in jvm-server.options +if [ $DEFINED_XMN -eq 0 ] && [ $USING_G1 -eq 0 ]; then + echo "It is not recommended to set -Xmn with the G1 garbage collector. See comments for -Xmn in jvm-server.options for details." + exit 1 +fi + +if [ "$JVM_ARCH" = "64-Bit" ] && [ $USING_CMS -eq 0 ]; then + JVM_OPTS="$JVM_OPTS -XX:+UseCondCardMark" +fi + +# provides hints to the JIT compiler +JVM_OPTS="$JVM_OPTS -XX:CompileCommandFile=$CASSANDRA_CONF/hotspot_compiler" + +# add the jamm javaagent +JVM_OPTS="$JVM_OPTS -javaagent:$CASSANDRA_HOME/lib/jamm-0.3.2.jar" + +# set jvm HeapDumpPath with CASSANDRA_HEAPDUMP_DIR +if [ "x$CASSANDRA_HEAPDUMP_DIR" != "x" ]; then + JVM_OPTS="$JVM_OPTS -XX:HeapDumpPath=$CASSANDRA_HEAPDUMP_DIR/cassandra-`date +%s`-pid$$.hprof" +fi + +# stop the jvm on OutOfMemoryError as it can result in some data corruption +# uncomment the preferred option +# ExitOnOutOfMemoryError and CrashOnOutOfMemoryError require a JRE greater or equals to 1.7 update 101 or 1.8 update 92 +# For OnOutOfMemoryError we cannot use the JVM_OPTS variables because bash commands split words +# on white spaces without taking quotes into account +# JVM_OPTS="$JVM_OPTS -XX:+ExitOnOutOfMemoryError" +# JVM_OPTS="$JVM_OPTS -XX:+CrashOnOutOfMemoryError" +JVM_ON_OUT_OF_MEMORY_ERROR_OPT="-XX:OnOutOfMemoryError=kill -9 %p" + +# print an heap histogram on OutOfMemoryError +# JVM_OPTS="$JVM_OPTS -Dcassandra.printHeapHistogramOnOutOfMemoryError=true" + +# jmx: metrics and administration interface +# +# add this if you're having trouble connecting: +# JVM_OPTS="$JVM_OPTS -Djava.rmi.server.hostname=" +# +# see +# https://blogs.oracle.com/jmxetc/entry/troubleshooting_connection_problems_in_jconsole +# for more on configuring JMX through firewalls, etc. (Short version: +# get it working with no firewall first.) +# +# Cassandra ships with JMX accessible *only* from localhost. +# To enable remote JMX connections, uncomment lines below +# with authentication and/or ssl enabled. See https://wiki.apache.org/cassandra/JmxSecurity +# +if [ "x$LOCAL_JMX" = "x" ]; then + LOCAL_JMX=yes +fi + +# Specifies the default port over which Cassandra will be available for +# JMX connections. +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +JMX_PORT="7199" + +if [ "$LOCAL_JMX" = "yes" ]; then + JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.local.port=$JMX_PORT" + JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=false" +else + JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.port=$JMX_PORT" + # if ssl is enabled the same port cannot be used for both jmx and rmi so either + # pick another value for this property or comment out to use a random port (though see CASSANDRA-7087 for origins) + JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.rmi.port=$JMX_PORT" + + # turn on JMX authentication. See below for further options + JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=true" + + # jmx ssl options + #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl=true" + #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.need.client.auth=true" + #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.enabled.protocols=" + #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.enabled.cipher.suites=" + #JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.keyStore=/path/to/keystore" + #JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.keyStorePassword=" + #JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.trustStore=/path/to/truststore" + #JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.trustStorePassword=" +fi + +# jmx authentication and authorization options. By default, auth is only +# activated for remote connections but they can also be enabled for local only JMX +## Basic file based authn & authz +JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.password.file=/etc/cassandra/jmxremote.password" +#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.access.file=/etc/cassandra/jmxremote.access" +## Custom auth settings which can be used as alternatives to JMX's out of the box auth utilities. +## JAAS login modules can be used for authentication by uncommenting these two properties. +## Cassandra ships with a LoginModule implementation - org.apache.cassandra.auth.CassandraLoginModule - +## which delegates to the IAuthenticator configured in cassandra.yaml. See the sample JAAS configuration +## file cassandra-jaas.config +#JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.login.config=CassandraLogin" +#JVM_OPTS="$JVM_OPTS -Djava.security.auth.login.config=$CASSANDRA_CONF/cassandra-jaas.config" + +## Cassandra also ships with a helper for delegating JMX authz calls to the configured IAuthorizer, +## uncomment this to use it. Requires one of the two authentication options to be enabled +#JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.authorizer=org.apache.cassandra.auth.jmx.AuthorizationProxy" + +# To use mx4j, an HTML interface for JMX, add mx4j-tools.jar to the lib/ +# directory. +# See http://cassandra.apache.org/doc/latest/operating/metrics.html#jmx +# By default mx4j listens on the broadcast_address, port 8081. Uncomment the following lines +# to control its listen address and port. +#MX4J_ADDRESS="127.0.0.1" +#MX4J_PORT="8081" + +# Cassandra uses SIGAR to capture OS metrics CASSANDRA-7838 +# for SIGAR we have to set the java.library.path +# to the location of the native libraries. +JVM_OPTS="$JVM_OPTS -Djava.library.path=$CASSANDRA_HOME/lib/sigar-bin" + +if [ "x$MX4J_ADDRESS" != "x" ]; then + if [[ "$MX4J_ADDRESS" == \-Dmx4jaddress* ]]; then + # Backward compatible with the older style #13578 + JVM_OPTS="$JVM_OPTS $MX4J_ADDRESS" + else + JVM_OPTS="$JVM_OPTS -Dmx4jaddress=$MX4J_ADDRESS" + fi +fi +if [ "x$MX4J_PORT" != "x" ]; then + if [[ "$MX4J_PORT" == \-Dmx4jport* ]]; then + # Backward compatible with the older style #13578 + JVM_OPTS="$JVM_OPTS $MX4J_PORT" + else + JVM_OPTS="$JVM_OPTS -Dmx4jport=$MX4J_PORT" + fi +fi + +JVM_OPTS="$JVM_OPTS $JVM_EXTRA_OPTS" + diff --git a/etc/cass2/cassandra-rackdc.properties b/etc/cass2/cassandra-rackdc.properties new file mode 100644 index 0000000..84716f5 --- /dev/null +++ b/etc/cass2/cassandra-rackdc.properties @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# These properties are used with GossipingPropertyFileSnitch and will +# indicate the rack and dc for this node +dc= Mars +rack= West + +# Add a suffix to a datacenter name. Used by the Ec2Snitch and Ec2MultiRegionSnitch +# to append a string to the EC2 region name. +#dc_suffix= + +# Uncomment the following line to make this snitch prefer the internal ip when possible, as the Ec2MultiRegionSnitch does. +# prefer_local=true + +# Datacenter and rack naming convention used by the Ec2Snitch and Ec2MultiRegionSnitch. +# Options are: +# legacy : datacenter name is the part of the availability zone name preceding the last "-" +# when the zone ends in -1 and includes the number if not -1. Rack is the portion of +# the availability zone name following the last "-". +# Examples: us-west-1a => dc: us-west, rack: 1a; us-west-2b => dc: us-west-2, rack: 2b; +# YOU MUST USE THIS VALUE IF YOU ARE UPGRADING A PRE-4.0 CLUSTER +# standard : Default value. datacenter name is the standard AWS region name, including the number. +# rack name is the region plus the availability zone letter. +# Examples: us-west-1a => dc: us-west-1, rack: us-west-1a; us-west-2b => dc: us-west-2, rack: us-west-2b; +# ec2_naming_scheme=standard diff --git a/etc/cass2/cassandra.yaml b/etc/cass2/cassandra.yaml new file mode 100644 index 0000000..3037ae5 --- /dev/null +++ b/etc/cass2/cassandra.yaml @@ -0,0 +1,1419 @@ + +# Cassandra storage config YAML + +# NOTE: +# See https://cassandra.apache.org/doc/latest/configuration/ for +# full explanations of configuration directives +# /NOTE + +# The name of the cluster. This is mainly used to prevent machines in +# one logical cluster from joining another. +cluster_name: SolarSystem + +# This defines the number of tokens randomly assigned to this node on the ring +# The more tokens, relative to other nodes, the larger the proportion of data +# that this node will store. You probably want all nodes to have the same number +# of tokens assuming they have equal hardware capability. +# +# If you leave this unspecified, Cassandra will use the default of 1 token for legacy compatibility, +# and will use the initial_token as described below. +# +# Specifying initial_token will override this setting on the node's initial start, +# on subsequent starts, this setting will apply even if initial token is set. +# +# See https://cassandra.apache.org/doc/latest/getting_started/production.html#tokens for +# best practice information about num_tokens. +# +num_tokens: 128 + +# Triggers automatic allocation of num_tokens tokens for this node. The allocation +# algorithm attempts to choose tokens in a way that optimizes replicated load over +# the nodes in the datacenter for the replica factor. +# +# The load assigned to each node will be close to proportional to its number of +# vnodes. +# +# Only supported with the Murmur3Partitioner. + +# Replica factor is determined via the replication strategy used by the specified +# keyspace. +# allocate_tokens_for_keyspace: KEYSPACE + +# Replica factor is explicitly set, regardless of keyspace or datacenter. +# This is the replica factor within the datacenter, like NTS. +allocate_tokens_for_local_replication_factor: 3 + +# initial_token allows you to specify tokens manually. While you can use it with +# vnodes (num_tokens > 1, above) -- in which case you should provide a +# comma-separated list -- it's primarily used when adding nodes to legacy clusters +# that do not have vnodes enabled. +# initial_token: + +# May either be "true" or "false" to enable globally +hinted_handoff_enabled: true + +# When hinted_handoff_enabled is true, a black list of data centers that will not +# perform hinted handoff +# hinted_handoff_disabled_datacenters: +# - DC1 +# - DC2 + +# this defines the maximum amount of time a dead host will have hints +# generated. After it has been dead this long, new hints for it will not be +# created until it has been seen alive and gone down again. +max_hint_window_in_ms: 10800000 # 3 hours + +# Maximum throttle in KBs per second, per delivery thread. This will be +# reduced proportionally to the number of nodes in the cluster. (If there +# are two nodes in the cluster, each delivery thread will use the maximum +# rate; if there are three, each will throttle to half of the maximum, +# since we expect two nodes to be delivering hints simultaneously.) +hinted_handoff_throttle_in_kb: 1024 + +# Number of threads with which to deliver hints; +# Consider increasing this number when you have multi-dc deployments, since +# cross-dc handoff tends to be slower +max_hints_delivery_threads: 2 + +# Directory where Cassandra should store hints. +# If not set, the default directory is $CASSANDRA_HOME/data/hints. +# hints_directory: /var/lib/cassandra/hints + +# How often hints should be flushed from the internal buffers to disk. +# Will *not* trigger fsync. +hints_flush_period_in_ms: 10000 + +# Maximum size for a single hints file, in megabytes. +max_hints_file_size_in_mb: 128 + +# Compression to apply to the hint files. If omitted, hints files +# will be written uncompressed. LZ4, Snappy, and Deflate compressors +# are supported. +#hints_compression: +# - class_name: LZ4Compressor +# parameters: +# - + +# Maximum throttle in KBs per second, total. This will be +# reduced proportionally to the number of nodes in the cluster. +batchlog_replay_throttle_in_kb: 1024 + +# Authentication backend, implementing IAuthenticator; used to identify users +# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthenticator, +# PasswordAuthenticator}. +# +# - AllowAllAuthenticator performs no checks - set it to disable authentication. +# - PasswordAuthenticator relies on username/password pairs to authenticate +# users. It keeps usernames and hashed passwords in system_auth.roles table. +# Please increase system_auth keyspace replication factor if you use this authenticator. +# If using PasswordAuthenticator, CassandraRoleManager must also be used (see below) +authenticator: AllowAllAuthenticator + +# Authorization backend, implementing IAuthorizer; used to limit access/provide permissions +# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthorizer, +# CassandraAuthorizer}. +# +# - AllowAllAuthorizer allows any action to any user - set it to disable authorization. +# - CassandraAuthorizer stores permissions in system_auth.role_permissions table. Please +# increase system_auth keyspace replication factor if you use this authorizer. +authorizer: AllowAllAuthorizer + +# Part of the Authentication & Authorization backend, implementing IRoleManager; used +# to maintain grants and memberships between roles. +# Out of the box, Cassandra provides org.apache.cassandra.auth.CassandraRoleManager, +# which stores role information in the system_auth keyspace. Most functions of the +# IRoleManager require an authenticated login, so unless the configured IAuthenticator +# actually implements authentication, most of this functionality will be unavailable. +# +# - CassandraRoleManager stores role data in the system_auth keyspace. Please +# increase system_auth keyspace replication factor if you use this role manager. +role_manager: CassandraRoleManager + +# Network authorization backend, implementing INetworkAuthorizer; used to restrict user +# access to certain DCs +# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllNetworkAuthorizer, +# CassandraNetworkAuthorizer}. +# +# - AllowAllNetworkAuthorizer allows access to any DC to any user - set it to disable authorization. +# - CassandraNetworkAuthorizer stores permissions in system_auth.network_permissions table. Please +# increase system_auth keyspace replication factor if you use this authorizer. +network_authorizer: AllowAllNetworkAuthorizer + +# Validity period for roles cache (fetching granted roles can be an expensive +# operation depending on the role manager, CassandraRoleManager is one example) +# Granted roles are cached for authenticated sessions in AuthenticatedUser and +# after the period specified here, become eligible for (async) reload. +# Defaults to 2000, set to 0 to disable caching entirely. +# Will be disabled automatically for AllowAllAuthenticator. +roles_validity_in_ms: 2000 + +# Refresh interval for roles cache (if enabled). +# After this interval, cache entries become eligible for refresh. Upon next +# access, an async reload is scheduled and the old value returned until it +# completes. If roles_validity_in_ms is non-zero, then this must be +# also. +# Defaults to the same value as roles_validity_in_ms. +# roles_update_interval_in_ms: 2000 + +# Validity period for permissions cache (fetching permissions can be an +# expensive operation depending on the authorizer, CassandraAuthorizer is +# one example). Defaults to 2000, set to 0 to disable. +# Will be disabled automatically for AllowAllAuthorizer. +permissions_validity_in_ms: 2000 + +# Refresh interval for permissions cache (if enabled). +# After this interval, cache entries become eligible for refresh. Upon next +# access, an async reload is scheduled and the old value returned until it +# completes. If permissions_validity_in_ms is non-zero, then this must be +# also. +# Defaults to the same value as permissions_validity_in_ms. +# permissions_update_interval_in_ms: 2000 + +# Validity period for credentials cache. This cache is tightly coupled to +# the provided PasswordAuthenticator implementation of IAuthenticator. If +# another IAuthenticator implementation is configured, this cache will not +# be automatically used and so the following settings will have no effect. +# Please note, credentials are cached in their encrypted form, so while +# activating this cache may reduce the number of queries made to the +# underlying table, it may not bring a significant reduction in the +# latency of individual authentication attempts. +# Defaults to 2000, set to 0 to disable credentials caching. +credentials_validity_in_ms: 2000 + +# Refresh interval for credentials cache (if enabled). +# After this interval, cache entries become eligible for refresh. Upon next +# access, an async reload is scheduled and the old value returned until it +# completes. If credentials_validity_in_ms is non-zero, then this must be +# also. +# Defaults to the same value as credentials_validity_in_ms. +# credentials_update_interval_in_ms: 2000 + +# The partitioner is responsible for distributing groups of rows (by +# partition key) across nodes in the cluster. The partitioner can NOT be +# changed without reloading all data. If you are adding nodes or upgrading, +# you should set this to the same partitioner that you are currently using. +# +# The default partitioner is the Murmur3Partitioner. Older partitioners +# such as the RandomPartitioner, ByteOrderedPartitioner, and +# OrderPreservingPartitioner have been included for backward compatibility only. +# For new clusters, you should NOT change this value. +# +partitioner: org.apache.cassandra.dht.Murmur3Partitioner + +# Directories where Cassandra should store data on disk. If multiple +# directories are specified, Cassandra will spread data evenly across +# them by partitioning the token ranges. +# If not set, the default directory is $CASSANDRA_HOME/data/data. +# data_file_directories: +# - /var/lib/cassandra/data + +# Directory were Cassandra should store the data of the local system keyspaces. +# By default Cassandra will store the data of the local system keyspaces in the first of the data directories specified +# by data_file_directories. +# This approach ensures that if one of the other disks is lost Cassandra can continue to operate. For extra security +# this setting allows to store those data on a different directory that provides redundancy. +# local_system_data_file_directory: + +# commit log. when running on magnetic HDD, this should be a +# separate spindle than the data directories. +# If not set, the default directory is $CASSANDRA_HOME/data/commitlog. +# commitlog_directory: /var/lib/cassandra/commitlog + +# Enable / disable CDC functionality on a per-node basis. This modifies the logic used +# for write path allocation rejection (standard: never reject. cdc: reject Mutation +# containing a CDC-enabled table if at space limit in cdc_raw_directory). +cdc_enabled: false + +# CommitLogSegments are moved to this directory on flush if cdc_enabled: true and the +# segment contains mutations for a CDC-enabled table. This should be placed on a +# separate spindle than the data directories. If not set, the default directory is +# $CASSANDRA_HOME/data/cdc_raw. +# cdc_raw_directory: /var/lib/cassandra/cdc_raw + +# Policy for data disk failures: +# +# die +# shut down gossip and client transports and kill the JVM for any fs errors or +# single-sstable errors, so the node can be replaced. +# +# stop_paranoid +# shut down gossip and client transports even for single-sstable errors, +# kill the JVM for errors during startup. +# +# stop +# shut down gossip and client transports, leaving the node effectively dead, but +# can still be inspected via JMX, kill the JVM for errors during startup. +# +# best_effort +# stop using the failed disk and respond to requests based on +# remaining available sstables. This means you WILL see obsolete +# data at CL.ONE! +# +# ignore +# ignore fatal errors and let requests fail, as in pre-1.2 Cassandra +disk_failure_policy: stop + +# Policy for commit disk failures: +# +# die +# shut down the node and kill the JVM, so the node can be replaced. +# +# stop +# shut down the node, leaving the node effectively dead, but +# can still be inspected via JMX. +# +# stop_commit +# shutdown the commit log, letting writes collect but +# continuing to service reads, as in pre-2.0.5 Cassandra +# +# ignore +# ignore fatal errors and let the batches fail +commit_failure_policy: stop + +# Maximum size of the native protocol prepared statement cache +# +# Valid values are either "auto" (omitting the value) or a value greater 0. +# +# Note that specifying a too large value will result in long running GCs and possbily +# out-of-memory errors. Keep the value at a small fraction of the heap. +# +# If you constantly see "prepared statements discarded in the last minute because +# cache limit reached" messages, the first step is to investigate the root cause +# of these messages and check whether prepared statements are used correctly - +# i.e. use bind markers for variable parts. +# +# Do only change the default value, if you really have more prepared statements than +# fit in the cache. In most cases it is not neccessary to change this value. +# Constantly re-preparing statements is a performance penalty. +# +# Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater +prepared_statements_cache_size_mb: + +# Maximum size of the key cache in memory. +# +# Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the +# minimum, sometimes more. The key cache is fairly tiny for the amount of +# time it saves, so it's worthwhile to use it at large numbers. +# The row cache saves even more time, but must contain the entire row, +# so it is extremely space-intensive. It's best to only use the +# row cache if you have hot rows or static rows. +# +# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. +# +# Default value is empty to make it "auto" (min(5% of Heap (in MB), 100MB)). Set to 0 to disable key cache. +key_cache_size_in_mb: + +# Duration in seconds after which Cassandra should +# save the key cache. Caches are saved to saved_caches_directory as +# specified in this configuration file. +# +# Saved caches greatly improve cold-start speeds, and is relatively cheap in +# terms of I/O for the key cache. Row cache saving is much more expensive and +# has limited use. +# +# Default is 14400 or 4 hours. +key_cache_save_period: 14400 + +# Number of keys from the key cache to save +# Disabled by default, meaning all keys are going to be saved +# key_cache_keys_to_save: 100 + +# Row cache implementation class name. Available implementations: +# +# org.apache.cassandra.cache.OHCProvider +# Fully off-heap row cache implementation (default). +# +# org.apache.cassandra.cache.SerializingCacheProvider +# This is the row cache implementation availabile +# in previous releases of Cassandra. +# row_cache_class_name: org.apache.cassandra.cache.OHCProvider + +# Maximum size of the row cache in memory. +# Please note that OHC cache implementation requires some additional off-heap memory to manage +# the map structures and some in-flight memory during operations before/after cache entries can be +# accounted against the cache capacity. This overhead is usually small compared to the whole capacity. +# Do not specify more memory that the system can afford in the worst usual situation and leave some +# headroom for OS block level cache. Do never allow your system to swap. +# +# Default value is 0, to disable row caching. +row_cache_size_in_mb: 0 + +# Duration in seconds after which Cassandra should save the row cache. +# Caches are saved to saved_caches_directory as specified in this configuration file. +# +# Saved caches greatly improve cold-start speeds, and is relatively cheap in +# terms of I/O for the key cache. Row cache saving is much more expensive and +# has limited use. +# +# Default is 0 to disable saving the row cache. +row_cache_save_period: 0 + +# Number of keys from the row cache to save. +# Specify 0 (which is the default), meaning all keys are going to be saved +# row_cache_keys_to_save: 100 + +# Maximum size of the counter cache in memory. +# +# Counter cache helps to reduce counter locks' contention for hot counter cells. +# In case of RF = 1 a counter cache hit will cause Cassandra to skip the read before +# write entirely. With RF > 1 a counter cache hit will still help to reduce the duration +# of the lock hold, helping with hot counter cell updates, but will not allow skipping +# the read entirely. Only the local (clock, count) tuple of a counter cell is kept +# in memory, not the whole counter, so it's relatively cheap. +# +# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. +# +# Default value is empty to make it "auto" (min(2.5% of Heap (in MB), 50MB)). Set to 0 to disable counter cache. +# NOTE: if you perform counter deletes and rely on low gcgs, you should disable the counter cache. +counter_cache_size_in_mb: + +# Duration in seconds after which Cassandra should +# save the counter cache (keys only). Caches are saved to saved_caches_directory as +# specified in this configuration file. +# +# Default is 7200 or 2 hours. +counter_cache_save_period: 7200 + +# Number of keys from the counter cache to save +# Disabled by default, meaning all keys are going to be saved +# counter_cache_keys_to_save: 100 + +# saved caches +# If not set, the default directory is $CASSANDRA_HOME/data/saved_caches. +# saved_caches_directory: /var/lib/cassandra/saved_caches + +# Number of seconds the server will wait for each cache (row, key, etc ...) to load while starting +# the Cassandra process. Setting this to a negative value is equivalent to disabling all cache loading on startup +# while still having the cache during runtime. +# cache_load_timeout_seconds: 30 + +# commitlog_sync may be either "periodic", "group", or "batch." +# +# When in batch mode, Cassandra won't ack writes until the commit log +# has been flushed to disk. Each incoming write will trigger the flush task. +# commitlog_sync_batch_window_in_ms is a deprecated value. Previously it had +# almost no value, and is being removed. +# +# commitlog_sync_batch_window_in_ms: 2 +# +# group mode is similar to batch mode, where Cassandra will not ack writes +# until the commit log has been flushed to disk. The difference is group +# mode will wait up to commitlog_sync_group_window_in_ms between flushes. +# +# commitlog_sync_group_window_in_ms: 1000 +# +# the default option is "periodic" where writes may be acked immediately +# and the CommitLog is simply synced every commitlog_sync_period_in_ms +# milliseconds. +commitlog_sync: periodic +commitlog_sync_period_in_ms: 10000 + +# When in periodic commitlog mode, the number of milliseconds to block writes +# while waiting for a slow disk flush to complete. +# periodic_commitlog_sync_lag_block_in_ms: + +# The size of the individual commitlog file segments. A commitlog +# segment may be archived, deleted, or recycled once all the data +# in it (potentially from each columnfamily in the system) has been +# flushed to sstables. +# +# The default size is 32, which is almost always fine, but if you are +# archiving commitlog segments (see commitlog_archiving.properties), +# then you probably want a finer granularity of archiving; 8 or 16 MB +# is reasonable. +# Max mutation size is also configurable via max_mutation_size_in_kb setting in +# cassandra.yaml. The default is half the size commitlog_segment_size_in_mb * 1024. +# This should be positive and less than 2048. +# +# NOTE: If max_mutation_size_in_kb is set explicitly then commitlog_segment_size_in_mb must +# be set to at least twice the size of max_mutation_size_in_kb / 1024 +# +commitlog_segment_size_in_mb: 32 + +# Compression to apply to the commit log. If omitted, the commit log +# will be written uncompressed. LZ4, Snappy, and Deflate compressors +# are supported. +# commitlog_compression: +# - class_name: LZ4Compressor +# parameters: +# - + +# Compression to apply to SSTables as they flush for compressed tables. +# Note that tables without compression enabled do not respect this flag. +# +# As high ratio compressors like LZ4HC, Zstd, and Deflate can potentially +# block flushes for too long, the default is to flush with a known fast +# compressor in those cases. Options are: +# +# none : Flush without compressing blocks but while still doing checksums. +# fast : Flush with a fast compressor. If the table is already using a +# fast compressor that compressor is used. +# table: Always flush with the same compressor that the table uses. This +# was the pre 4.0 behavior. +# +# flush_compression: fast + +# any class that implements the SeedProvider interface and has a +# constructor that takes a Map of parameters will do. +seed_provider: + # Addresses of hosts that are deemed contact points. + # Cassandra nodes use this list of hosts to find each other and learn + # the topology of the ring. You must change this if you are running + # multiple nodes! + - class_name: org.apache.cassandra.locator.SimpleSeedProvider + parameters: + # seeds is actually a comma-delimited list of addresses. + # Ex: ",," + - seeds: "cass1,cass2" + +# For workloads with more data than can fit in memory, Cassandra's +# bottleneck will be reads that need to fetch data from +# disk. "concurrent_reads" should be set to (16 * number_of_drives) in +# order to allow the operations to enqueue low enough in the stack +# that the OS and drives can reorder them. Same applies to +# "concurrent_counter_writes", since counter writes read the current +# values before incrementing and writing them back. +# +# On the other hand, since writes are almost never IO bound, the ideal +# number of "concurrent_writes" is dependent on the number of cores in +# your system; (8 * number_of_cores) is a good rule of thumb. +concurrent_reads: 32 +concurrent_writes: 32 +concurrent_counter_writes: 32 + +# For materialized view writes, as there is a read involved, so this should +# be limited by the less of concurrent reads or concurrent writes. +concurrent_materialized_view_writes: 32 + +# Maximum memory to use for inter-node and client-server networking buffers. +# +# Defaults to the smaller of 1/16 of heap or 128MB. This pool is allocated off-heap, +# so is in addition to the memory allocated for heap. The cache also has on-heap +# overhead which is roughly 128 bytes per chunk (i.e. 0.2% of the reserved size +# if the default 64k chunk size is used). +# Memory is only allocated when needed. +# networking_cache_size_in_mb: 128 + +# Enable the sstable chunk cache. The chunk cache will store recently accessed +# sections of the sstable in-memory as uncompressed buffers. +# file_cache_enabled: false + +# Maximum memory to use for sstable chunk cache and buffer pooling. +# 32MB of this are reserved for pooling buffers, the rest is used for chunk cache +# that holds uncompressed sstable chunks. +# Defaults to the smaller of 1/4 of heap or 512MB. This pool is allocated off-heap, +# so is in addition to the memory allocated for heap. The cache also has on-heap +# overhead which is roughly 128 bytes per chunk (i.e. 0.2% of the reserved size +# if the default 64k chunk size is used). +# Memory is only allocated when needed. +# file_cache_size_in_mb: 512 + +# Flag indicating whether to allocate on or off heap when the sstable buffer +# pool is exhausted, that is when it has exceeded the maximum memory +# file_cache_size_in_mb, beyond which it will not cache buffers but allocate on request. + +# buffer_pool_use_heap_if_exhausted: true + +# The strategy for optimizing disk read +# Possible values are: +# ssd (for solid state disks, the default) +# spinning (for spinning disks) +# disk_optimization_strategy: ssd + +# Total permitted memory to use for memtables. Cassandra will stop +# accepting writes when the limit is exceeded until a flush completes, +# and will trigger a flush based on memtable_cleanup_threshold +# If omitted, Cassandra will set both to 1/4 the size of the heap. +# memtable_heap_space_in_mb: 2048 +# memtable_offheap_space_in_mb: 2048 + +# memtable_cleanup_threshold is deprecated. The default calculation +# is the only reasonable choice. See the comments on memtable_flush_writers +# for more information. +# +# Ratio of occupied non-flushing memtable size to total permitted size +# that will trigger a flush of the largest memtable. Larger mct will +# mean larger flushes and hence less compaction, but also less concurrent +# flush activity which can make it difficult to keep your disks fed +# under heavy write load. +# +# memtable_cleanup_threshold defaults to 1 / (memtable_flush_writers + 1) +# memtable_cleanup_threshold: 0.11 + +# Specify the way Cassandra allocates and manages memtable memory. +# Options are: +# +# heap_buffers +# on heap nio buffers +# +# offheap_buffers +# off heap (direct) nio buffers +# +# offheap_objects +# off heap objects +memtable_allocation_type: heap_buffers + +# Limit memory usage for Merkle tree calculations during repairs. The default +# is 1/16th of the available heap. The main tradeoff is that smaller trees +# have less resolution, which can lead to over-streaming data. If you see heap +# pressure during repairs, consider lowering this, but you cannot go below +# one megabyte. If you see lots of over-streaming, consider raising +# this or using subrange repair. +# +# For more details see https://issues.apache.org/jira/browse/CASSANDRA-14096. +# +# repair_session_space_in_mb: + +# Total space to use for commit logs on disk. +# +# If space gets above this value, Cassandra will flush every dirty CF +# in the oldest segment and remove it. So a small total commitlog space +# will tend to cause more flush activity on less-active columnfamilies. +# +# The default value is the smaller of 8192, and 1/4 of the total space +# of the commitlog volume. +# +# commitlog_total_space_in_mb: 8192 + +# This sets the number of memtable flush writer threads per disk +# as well as the total number of memtables that can be flushed concurrently. +# These are generally a combination of compute and IO bound. +# +# Memtable flushing is more CPU efficient than memtable ingest and a single thread +# can keep up with the ingest rate of a whole server on a single fast disk +# until it temporarily becomes IO bound under contention typically with compaction. +# At that point you need multiple flush threads. At some point in the future +# it may become CPU bound all the time. +# +# You can tell if flushing is falling behind using the MemtablePool.BlockedOnAllocation +# metric which should be 0, but will be non-zero if threads are blocked waiting on flushing +# to free memory. +# +# memtable_flush_writers defaults to two for a single data directory. +# This means that two memtables can be flushed concurrently to the single data directory. +# If you have multiple data directories the default is one memtable flushing at a time +# but the flush will use a thread per data directory so you will get two or more writers. +# +# Two is generally enough to flush on a fast disk [array] mounted as a single data directory. +# Adding more flush writers will result in smaller more frequent flushes that introduce more +# compaction overhead. +# +# There is a direct tradeoff between number of memtables that can be flushed concurrently +# and flush size and frequency. More is not better you just need enough flush writers +# to never stall waiting for flushing to free memory. +# +#memtable_flush_writers: 2 + +# Total space to use for change-data-capture logs on disk. +# +# If space gets above this value, Cassandra will throw WriteTimeoutException +# on Mutations including tables with CDC enabled. A CDCCompactor is responsible +# for parsing the raw CDC logs and deleting them when parsing is completed. +# +# The default value is the min of 4096 mb and 1/8th of the total space +# of the drive where cdc_raw_directory resides. +# cdc_total_space_in_mb: 4096 + +# When we hit our cdc_raw limit and the CDCCompactor is either running behind +# or experiencing backpressure, we check at the following interval to see if any +# new space for cdc-tracked tables has been made available. Default to 250ms +# cdc_free_space_check_interval_ms: 250 + +# A fixed memory pool size in MB for for SSTable index summaries. If left +# empty, this will default to 5% of the heap size. If the memory usage of +# all index summaries exceeds this limit, SSTables with low read rates will +# shrink their index summaries in order to meet this limit. However, this +# is a best-effort process. In extreme conditions Cassandra may need to use +# more than this amount of memory. +index_summary_capacity_in_mb: + +# How frequently index summaries should be resampled. This is done +# periodically to redistribute memory from the fixed-size pool to sstables +# proportional their recent read rates. Setting to -1 will disable this +# process, leaving existing index summaries at their current sampling level. +index_summary_resize_interval_in_minutes: 60 + +# Whether to, when doing sequential writing, fsync() at intervals in +# order to force the operating system to flush the dirty +# buffers. Enable this to avoid sudden dirty buffer flushing from +# impacting read latencies. Almost always a good idea on SSDs; not +# necessarily on platters. +trickle_fsync: false +trickle_fsync_interval_in_kb: 10240 + +# TCP port, for commands and data +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +storage_port: 7000 + +# SSL port, for legacy encrypted communication. This property is unused unless enabled in +# server_encryption_options (see below). As of cassandra 4.0, this property is deprecated +# as a single port can be used for either/both secure and insecure connections. +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +ssl_storage_port: 7001 + +# Address or interface to bind to and tell other Cassandra nodes to connect to. +# You _must_ change this if you want multiple nodes to be able to communicate! +# +# Set listen_address OR listen_interface, not both. +# +# Leaving it blank leaves it up to InetAddress.getLocalHost(). This +# will always do the Right Thing _if_ the node is properly configured +# (hostname, name resolution, etc), and the Right Thing is to use the +# address associated with the hostname (it might not be). If unresolvable +# it will fall back to InetAddress.getLoopbackAddress(), which is wrong for production systems. +# +# Setting listen_address to 0.0.0.0 is always wrong. +# +listen_address: 172.20.0.7 + +# Set listen_address OR listen_interface, not both. Interfaces must correspond +# to a single address, IP aliasing is not supported. +# listen_interface: eth0 + +# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address +# you can specify which should be chosen using listen_interface_prefer_ipv6. If false the first ipv4 +# address will be used. If true the first ipv6 address will be used. Defaults to false preferring +# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6. +# listen_interface_prefer_ipv6: false + +# Address to broadcast to other Cassandra nodes +# Leaving this blank will set it to the same value as listen_address +broadcast_address: 172.20.0.7 + +# When using multiple physical network interfaces, set this +# to true to listen on broadcast_address in addition to +# the listen_address, allowing nodes to communicate in both +# interfaces. +# Ignore this property if the network configuration automatically +# routes between the public and private networks such as EC2. +# listen_on_broadcast_address: false + +# Internode authentication backend, implementing IInternodeAuthenticator; +# used to allow/disallow connections from peer nodes. +# internode_authenticator: org.apache.cassandra.auth.AllowAllInternodeAuthenticator + +# Whether to start the native transport server. +# The address on which the native transport is bound is defined by rpc_address. +start_native_transport: true +# port for the CQL native transport to listen for clients on +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +native_transport_port: 9042 +# Enabling native transport encryption in client_encryption_options allows you to either use +# encryption for the standard port or to use a dedicated, additional port along with the unencrypted +# standard native_transport_port. +# Enabling client encryption and keeping native_transport_port_ssl disabled will use encryption +# for native_transport_port. Setting native_transport_port_ssl to a different value +# from native_transport_port will use encryption for native_transport_port_ssl while +# keeping native_transport_port unencrypted. +# native_transport_port_ssl: 9142 +# The maximum threads for handling requests (note that idle threads are stopped +# after 30 seconds so there is not corresponding minimum setting). +# native_transport_max_threads: 128 +# +# The maximum size of allowed frame. Frame (requests) larger than this will +# be rejected as invalid. The default is 256MB. If you're changing this parameter, +# you may want to adjust max_value_size_in_mb accordingly. This should be positive and less than 2048. +# native_transport_max_frame_size_in_mb: 256 + +# The maximum number of concurrent client connections. +# The default is -1, which means unlimited. +# native_transport_max_concurrent_connections: -1 + +# The maximum number of concurrent client connections per source ip. +# The default is -1, which means unlimited. +# native_transport_max_concurrent_connections_per_ip: -1 + +# Controls whether Cassandra honors older, yet currently supported, protocol versions. +# The default is true, which means all supported protocols will be honored. +native_transport_allow_older_protocols: true + +# Controls when idle client connections are closed. Idle connections are ones that had neither reads +# nor writes for a time period. +# +# Clients may implement heartbeats by sending OPTIONS native protocol message after a timeout, which +# will reset idle timeout timer on the server side. To close idle client connections, corresponding +# values for heartbeat intervals have to be set on the client side. +# +# Idle connection timeouts are disabled by default. +# native_transport_idle_timeout_in_ms: 60000 + +# The address or interface to bind the native transport server to. +# +# Set rpc_address OR rpc_interface, not both. +# +# Leaving rpc_address blank has the same effect as on listen_address +# (i.e. it will be based on the configured hostname of the node). +# +# Note that unlike listen_address, you can specify 0.0.0.0, but you must also +# set broadcast_rpc_address to a value other than 0.0.0.0. +# +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +rpc_address: 0.0.0.0 + +# Set rpc_address OR rpc_interface, not both. Interfaces must correspond +# to a single address, IP aliasing is not supported. +# rpc_interface: eth1 + +# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address +# you can specify which should be chosen using rpc_interface_prefer_ipv6. If false the first ipv4 +# address will be used. If true the first ipv6 address will be used. Defaults to false preferring +# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6. +# rpc_interface_prefer_ipv6: false + +# RPC address to broadcast to drivers and other Cassandra nodes. This cannot +# be set to 0.0.0.0. If left blank, this will be set to the value of +# rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must +# be set. +broadcast_rpc_address: 172.20.0.7 + +# enable or disable keepalive on rpc/native connections +rpc_keepalive: true + +# Uncomment to set socket buffer size for internode communication +# Note that when setting this, the buffer size is limited by net.core.wmem_max +# and when not setting it it is defined by net.ipv4.tcp_wmem +# See also: +# /proc/sys/net/core/wmem_max +# /proc/sys/net/core/rmem_max +# /proc/sys/net/ipv4/tcp_wmem +# /proc/sys/net/ipv4/tcp_wmem +# and 'man tcp' +# internode_socket_send_buffer_size_in_bytes: + +# Uncomment to set socket buffer size for internode communication +# Note that when setting this, the buffer size is limited by net.core.wmem_max +# and when not setting it it is defined by net.ipv4.tcp_wmem +# internode_socket_receive_buffer_size_in_bytes: + +# Set to true to have Cassandra create a hard link to each sstable +# flushed or streamed locally in a backups/ subdirectory of the +# keyspace data. Removing these links is the operator's +# responsibility. +incremental_backups: false + +# Whether or not to take a snapshot before each compaction. Be +# careful using this option, since Cassandra won't clean up the +# snapshots for you. Mostly useful if you're paranoid when there +# is a data format change. +snapshot_before_compaction: false + +# Whether or not a snapshot is taken of the data before keyspace truncation +# or dropping of column families. The STRONGLY advised default of true +# should be used to provide data safety. If you set this flag to false, you will +# lose data on truncation or drop. +auto_snapshot: true + +# The act of creating or clearing a snapshot involves creating or removing +# potentially tens of thousands of links, which can cause significant performance +# impact, especially on consumer grade SSDs. A non-zero value here can +# be used to throttle these links to avoid negative performance impact of +# taking and clearing snapshots +snapshot_links_per_second: 0 + +# Granularity of the collation index of rows within a partition. +# Increase if your rows are large, or if you have a very large +# number of rows per partition. The competing goals are these: +# +# - a smaller granularity means more index entries are generated +# and looking up rows withing the partition by collation column +# is faster +# - but, Cassandra will keep the collation index in memory for hot +# rows (as part of the key cache), so a larger granularity means +# you can cache more hot rows +column_index_size_in_kb: 64 + +# Per sstable indexed key cache entries (the collation index in memory +# mentioned above) exceeding this size will not be held on heap. +# This means that only partition information is held on heap and the +# index entries are read from disk. +# +# Note that this size refers to the size of the +# serialized index information and not the size of the partition. +column_index_cache_size_in_kb: 2 + +# Number of simultaneous compactions to allow, NOT including +# validation "compactions" for anti-entropy repair. Simultaneous +# compactions can help preserve read performance in a mixed read/write +# workload, by mitigating the tendency of small sstables to accumulate +# during a single long running compactions. The default is usually +# fine and if you experience problems with compaction running too +# slowly or too fast, you should look at +# compaction_throughput_mb_per_sec first. +# +# concurrent_compactors defaults to the smaller of (number of disks, +# number of cores), with a minimum of 2 and a maximum of 8. +# +# If your data directories are backed by SSD, you should increase this +# to the number of cores. +#concurrent_compactors: 1 + +# Number of simultaneous repair validations to allow. If not set or set to +# a value less than 1, it defaults to the value of concurrent_compactors. +# To set a value greeater than concurrent_compactors at startup, the system +# property cassandra.allow_unlimited_concurrent_validations must be set to +# true. To dynamically resize to a value > concurrent_compactors on a running +# node, first call the bypassConcurrentValidatorsLimit method on the +# org.apache.cassandra.db:type=StorageService mbean +# concurrent_validations: 0 + +# Number of simultaneous materialized view builder tasks to allow. +concurrent_materialized_view_builders: 1 + +# Throttles compaction to the given total throughput across the entire +# system. The faster you insert data, the faster you need to compact in +# order to keep the sstable count down, but in general, setting this to +# 16 to 32 times the rate you are inserting data is more than sufficient. +# Setting this to 0 disables throttling. Note that this accounts for all types +# of compaction, including validation compaction (building Merkle trees +# for repairs). +compaction_throughput_mb_per_sec: 64 + +# When compacting, the replacement sstable(s) can be opened before they +# are completely written, and used in place of the prior sstables for +# any range that has been written. This helps to smoothly transfer reads +# between the sstables, reducing page cache churn and keeping hot rows hot +sstable_preemptive_open_interval_in_mb: 50 + +# When enabled, permits Cassandra to zero-copy stream entire eligible +# SSTables between nodes, including every component. +# This speeds up the network transfer significantly subject to +# throttling specified by stream_throughput_outbound_megabits_per_sec. +# Enabling this will reduce the GC pressure on sending and receiving node. +# When unset, the default is enabled. While this feature tries to keep the +# disks balanced, it cannot guarantee it. This feature will be automatically +# disabled if internode encryption is enabled. +# stream_entire_sstables: true + +# Throttles all outbound streaming file transfers on this node to the +# given total throughput in Mbps. This is necessary because Cassandra does +# mostly sequential IO when streaming data during bootstrap or repair, which +# can lead to saturating the network connection and degrading rpc performance. +# When unset, the default is 200 Mbps or 25 MB/s. +# stream_throughput_outbound_megabits_per_sec: 200 + +# Throttles all streaming file transfer between the datacenters, +# this setting allows users to throttle inter dc stream throughput in addition +# to throttling all network stream traffic as configured with +# stream_throughput_outbound_megabits_per_sec +# When unset, the default is 200 Mbps or 25 MB/s +# inter_dc_stream_throughput_outbound_megabits_per_sec: 200 + +# Server side timeouts for requests. The server will return a timeout exception +# to the client if it can't complete an operation within the corresponding +# timeout. Those settings are a protection against: +# 1) having client wait on an operation that might never terminate due to some +# failures. +# 2) operations that use too much CPU/read too much data (leading to memory build +# up) by putting a limit to how long an operation will execute. +# For this reason, you should avoid putting these settings too high. In other words, +# if you are timing out requests because of underlying resource constraints then +# increasing the timeout will just cause more problems. Of course putting them too +# low is equally ill-advised since clients could get timeouts even for successful +# operations just because the timeout setting is too tight. + +# How long the coordinator should wait for read operations to complete. +# Lowest acceptable value is 10 ms. +read_request_timeout_in_ms: 50000 +# How long the coordinator should wait for seq or index scans to complete. +# Lowest acceptable value is 10 ms. +range_request_timeout_in_ms: 10000 +# How long the coordinator should wait for writes to complete. +# Lowest acceptable value is 10 ms. +write_request_timeout_in_ms: 20000 +# How long the coordinator should wait for counter writes to complete. +# Lowest acceptable value is 10 ms. +counter_write_request_timeout_in_ms: 5000 +# How long a coordinator should continue to retry a CAS operation +# that contends with other proposals for the same row. +# Lowest acceptable value is 10 ms. +cas_contention_timeout_in_ms: 1000 +# How long the coordinator should wait for truncates to complete +# (This can be much longer, because unless auto_snapshot is disabled +# we need to flush first so we can snapshot before removing the data.) +# Lowest acceptable value is 10 ms. +truncate_request_timeout_in_ms: 60000 +# The default timeout for other, miscellaneous operations. +# Lowest acceptable value is 10 ms. +request_timeout_in_ms: 10000 + +# Defensive settings for protecting Cassandra from true network partitions. +# See (CASSANDRA-14358) for details. +# +# The amount of time to wait for internode tcp connections to establish. +# internode_tcp_connect_timeout_in_ms: 2000 +# +# The amount of time unacknowledged data is allowed on a connection before we throw out the connection +# Note this is only supported on Linux + epoll, and it appears to behave oddly above a setting of 30000 +# (it takes much longer than 30s) as of Linux 4.12. If you want something that high set this to 0 +# which picks up the OS default and configure the net.ipv4.tcp_retries2 sysctl to be ~8. +# internode_tcp_user_timeout_in_ms: 30000 + +# The amount of time unacknowledged data is allowed on a streaming connection. +# The default is 5 minutes. Increase it or set it to 0 in order to increase the timeout. +# internode_streaming_tcp_user_timeout_in_ms: 300000 + +# Global, per-endpoint and per-connection limits imposed on messages queued for delivery to other nodes +# and waiting to be processed on arrival from other nodes in the cluster. These limits are applied to the on-wire +# size of the message being sent or received. +# +# The basic per-link limit is consumed in isolation before any endpoint or global limit is imposed. +# Each node-pair has three links: urgent, small and large. So any given node may have a maximum of +# N*3*(internode_application_send_queue_capacity_in_bytes+internode_application_receive_queue_capacity_in_bytes) +# messages queued without any coordination between them although in practice, with token-aware routing, only RF*tokens +# nodes should need to communicate with significant bandwidth. +# +# The per-endpoint limit is imposed on all messages exceeding the per-link limit, simultaneously with the global limit, +# on all links to or from a single node in the cluster. +# The global limit is imposed on all messages exceeding the per-link limit, simultaneously with the per-endpoint limit, +# on all links to or from any node in the cluster. +# +# internode_application_send_queue_capacity_in_bytes: 4194304 #4MiB +# internode_application_send_queue_reserve_endpoint_capacity_in_bytes: 134217728 #128MiB +# internode_application_send_queue_reserve_global_capacity_in_bytes: 536870912 #512MiB +# internode_application_receive_queue_capacity_in_bytes: 4194304 #4MiB +# internode_application_receive_queue_reserve_endpoint_capacity_in_bytes: 134217728 #128MiB +# internode_application_receive_queue_reserve_global_capacity_in_bytes: 536870912 #512MiB + + +# How long before a node logs slow queries. Select queries that take longer than +# this timeout to execute, will generate an aggregated log message, so that slow queries +# can be identified. Set this value to zero to disable slow query logging. +slow_query_log_timeout_in_ms: 500 + +# Enable operation timeout information exchange between nodes to accurately +# measure request timeouts. If disabled, replicas will assume that requests +# were forwarded to them instantly by the coordinator, which means that +# under overload conditions we will waste that much extra time processing +# already-timed-out requests. +# +# Warning: It is generally assumed that users have setup NTP on their clusters, and that clocks are modestly in sync, +# since this is a requirement for general correctness of last write wins. +#cross_node_timeout: true + +# Set keep-alive period for streaming +# This node will send a keep-alive message periodically with this period. +# If the node does not receive a keep-alive message from the peer for +# 2 keep-alive cycles the stream session times out and fail +# Default value is 300s (5 minutes), which means stalled stream +# times out in 10 minutes by default +# streaming_keep_alive_period_in_secs: 300 + +# Limit number of connections per host for streaming +# Increase this when you notice that joins are CPU-bound rather that network +# bound (for example a few nodes with big files). +# streaming_connections_per_host: 1 + + +# phi value that must be reached for a host to be marked down. +# most users should never need to adjust this. +# phi_convict_threshold: 8 + +# endpoint_snitch -- Set this to a class that implements +# IEndpointSnitch. The snitch has two functions: +# +# - it teaches Cassandra enough about your network topology to route +# requests efficiently +# - it allows Cassandra to spread replicas around your cluster to avoid +# correlated failures. It does this by grouping machines into +# "datacenters" and "racks." Cassandra will do its best not to have +# more than one replica on the same "rack" (which may not actually +# be a physical location) +# +# CASSANDRA WILL NOT ALLOW YOU TO SWITCH TO AN INCOMPATIBLE SNITCH +# ONCE DATA IS INSERTED INTO THE CLUSTER. This would cause data loss. +# This means that if you start with the default SimpleSnitch, which +# locates every node on "rack1" in "datacenter1", your only options +# if you need to add another datacenter are GossipingPropertyFileSnitch +# (and the older PFS). From there, if you want to migrate to an +# incompatible snitch like Ec2Snitch you can do it by adding new nodes +# under Ec2Snitch (which will locate them in a new "datacenter") and +# decommissioning the old ones. +# +# Out of the box, Cassandra provides: +# +# SimpleSnitch: +# Treats Strategy order as proximity. This can improve cache +# locality when disabling read repair. Only appropriate for +# single-datacenter deployments. +# +# GossipingPropertyFileSnitch +# This should be your go-to snitch for production use. The rack +# and datacenter for the local node are defined in +# cassandra-rackdc.properties and propagated to other nodes via +# gossip. If cassandra-topology.properties exists, it is used as a +# fallback, allowing migration from the PropertyFileSnitch. +# +# PropertyFileSnitch: +# Proximity is determined by rack and data center, which are +# explicitly configured in cassandra-topology.properties. +# +# Ec2Snitch: +# Appropriate for EC2 deployments in a single Region. Loads Region +# and Availability Zone information from the EC2 API. The Region is +# treated as the datacenter, and the Availability Zone as the rack. +# Only private IPs are used, so this will not work across multiple +# Regions. +# +# Ec2MultiRegionSnitch: +# Uses public IPs as broadcast_address to allow cross-region +# connectivity. (Thus, you should set seed addresses to the public +# IP as well.) You will need to open the storage_port or +# ssl_storage_port on the public IP firewall. (For intra-Region +# traffic, Cassandra will switch to the private IP after +# establishing a connection.) +# +# RackInferringSnitch: +# Proximity is determined by rack and data center, which are +# assumed to correspond to the 3rd and 2nd octet of each node's IP +# address, respectively. Unless this happens to match your +# deployment conventions, this is best used as an example of +# writing a custom Snitch class and is provided in that spirit. +# +# You can use a custom Snitch by setting this to the full class name +# of the snitch, which will be assumed to be on your classpath. +endpoint_snitch: GossipingPropertyFileSnitch + +# controls how often to perform the more expensive part of host score +# calculation +dynamic_snitch_update_interval_in_ms: 100 +# controls how often to reset all host scores, allowing a bad host to +# possibly recover +dynamic_snitch_reset_interval_in_ms: 600000 +# if set greater than zero, this will allow +# 'pinning' of replicas to hosts in order to increase cache capacity. +# The badness threshold will control how much worse the pinned host has to be +# before the dynamic snitch will prefer other replicas over it. This is +# expressed as a double which represents a percentage. Thus, a value of +# 0.2 means Cassandra would continue to prefer the static snitch values +# until the pinned host was 20% worse than the fastest. +dynamic_snitch_badness_threshold: 1.0 + +# Configure server-to-server internode encryption +# +# JVM and netty defaults for supported SSL socket protocols and cipher suites can +# be replaced using custom encryption options. This is not recommended +# unless you have policies in place that dictate certain settings, or +# need to disable vulnerable ciphers or protocols in case the JVM cannot +# be updated. +# +# FIPS compliant settings can be configured at JVM level and should not +# involve changing encryption settings here: +# https://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/FIPS.html +# +# **NOTE** this default configuration is an insecure configuration. If you need to +# enable server-to-server encryption generate server keystores (and truststores for mutual +# authentication) per: +# http://download.oracle.com/javase/8/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore +# Then perform the following configuration changes: +# +# Step 1: Set internode_encryption= and explicitly set optional=true. Restart all nodes +# +# Step 2: Set optional=false (or remove it) and if you generated truststores and want to use mutual +# auth set require_client_auth=true. Restart all nodes +server_encryption_options: + # On outbound connections, determine which type of peers to securely connect to. + # The available options are : + # none : Do not encrypt outgoing connections + # dc : Encrypt connections to peers in other datacenters but not within datacenters + # rack : Encrypt connections to peers in other racks but not within racks + # all : Always use encrypted connections + internode_encryption: none + # When set to true, encrypted and unencrypted connections are allowed on the storage_port + # This should _only be true_ while in unencrypted or transitional operation + # optional defaults to true if internode_encryption is none + # optional: true + # If enabled, will open up an encrypted listening socket on ssl_storage_port. Should only be used + # during upgrade to 4.0; otherwise, set to false. + enable_legacy_ssl_storage_port: false + # Set to a valid keystore if internode_encryption is dc, rack or all + keystore: conf/.keystore + keystore_password: cassandra + # Verify peer server certificates + require_client_auth: false + # Set to a valid trustore if require_client_auth is true + truststore: conf/.truststore + truststore_password: cassandra + # Verify that the host name in the certificate matches the connected host + require_endpoint_verification: false + # More advanced defaults: + # protocol: TLS + # store_type: JKS + # cipher_suites: [ + # TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, + # TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA, + # TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, TLS_RSA_WITH_AES_128_GCM_SHA256, TLS_RSA_WITH_AES_128_CBC_SHA, + # TLS_RSA_WITH_AES_256_CBC_SHA + # ] + +# Configure client-to-server encryption. +# +# **NOTE** this default configuration is an insecure configuration. If you need to +# enable client-to-server encryption generate server keystores (and truststores for mutual +# authentication) per: +# http://download.oracle.com/javase/8/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore +# Then perform the following configuration changes: +# +# Step 1: Set enabled=true and explicitly set optional=true. Restart all nodes +# +# Step 2: Set optional=false (or remove it) and if you generated truststores and want to use mutual +# auth set require_client_auth=true. Restart all nodes +client_encryption_options: + # Enable client-to-server encryption + enabled: false + # When set to true, encrypted and unencrypted connections are allowed on the native_transport_port + # This should _only be true_ while in unencrypted or transitional operation + # optional defaults to true when enabled is false, and false when enabled is true. + # optional: true + # Set keystore and keystore_password to valid keystores if enabled is true + keystore: conf/.keystore + keystore_password: cassandra + # Verify client certificates + require_client_auth: false + # Set trustore and truststore_password if require_client_auth is true + # truststore: conf/.truststore + # truststore_password: cassandra + # More advanced defaults: + # protocol: TLS + # store_type: JKS + # cipher_suites: [ + # TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, + # TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA, + # TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, TLS_RSA_WITH_AES_128_GCM_SHA256, TLS_RSA_WITH_AES_128_CBC_SHA, + # TLS_RSA_WITH_AES_256_CBC_SHA + # ] + +# internode_compression controls whether traffic between nodes is +# compressed. +# Can be: +# +# all +# all traffic is compressed +# +# dc +# traffic between different datacenters is compressed +# +# none +# nothing is compressed. +internode_compression: dc + +# Enable or disable tcp_nodelay for inter-dc communication. +# Disabling it will result in larger (but fewer) network packets being sent, +# reducing overhead from the TCP protocol itself, at the cost of increasing +# latency if you block for cross-datacenter responses. +inter_dc_tcp_nodelay: false + +# TTL for different trace types used during logging of the repair process. +tracetype_query_ttl: 86400 +tracetype_repair_ttl: 604800 + +# If unset, all GC Pauses greater than gc_log_threshold_in_ms will log at +# INFO level +# UDFs (user defined functions) are disabled by default. +# As of Cassandra 3.0 there is a sandbox in place that should prevent execution of evil code. +enable_user_defined_functions: false + +# Enables scripted UDFs (JavaScript UDFs). +# Java UDFs are always enabled, if enable_user_defined_functions is true. +# Enable this option to be able to use UDFs with "language javascript" or any custom JSR-223 provider. +# This option has no effect, if enable_user_defined_functions is false. +enable_scripted_user_defined_functions: false + +# The default Windows kernel timer and scheduling resolution is 15.6ms for power conservation. +# Lowering this value on Windows can provide much tighter latency and better throughput, however +# some virtualized environments may see a negative performance impact from changing this setting +# below their system default. The sysinternals 'clockres' tool can confirm your system's default +# setting. +windows_timer_interval: 1 + + +# Enables encrypting data at-rest (on disk). Different key providers can be plugged in, but the default reads from +# a JCE-style keystore. A single keystore can hold multiple keys, but the one referenced by +# the "key_alias" is the only key that will be used for encrypt opertaions; previously used keys +# can still (and should!) be in the keystore and will be used on decrypt operations +# (to handle the case of key rotation). +# +# It is strongly recommended to download and install Java Cryptography Extension (JCE) +# Unlimited Strength Jurisdiction Policy Files for your version of the JDK. +# (current link: http://www.oracle.com/technetwork/java/javase/downloads/jce8-download-2133166.html) +# +# Currently, only the following file types are supported for transparent data encryption, although +# more are coming in future cassandra releases: commitlog, hints +transparent_data_encryption_options: + enabled: false + chunk_length_kb: 64 + cipher: AES/CBC/PKCS5Padding + key_alias: testing:1 + # CBC IV length for AES needs to be 16 bytes (which is also the default size) + # iv_length: 16 + key_provider: + - class_name: org.apache.cassandra.security.JKSKeyProvider + parameters: + - keystore: conf/.keystore + keystore_password: cassandra + store_type: JCEKS + key_password: cassandra + + +##################### +# SAFETY THRESHOLDS # +##################### + +# When executing a scan, within or across a partition, we need to keep the +# tombstones seen in memory so we can return them to the coordinator, which +# will use them to make sure other replicas also know about the deleted rows. +# With workloads that generate a lot of tombstones, this can cause performance +# problems and even exaust the server heap. +# (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets) +# Adjust the thresholds here if you understand the dangers and want to +# scan more tombstones anyway. These thresholds may also be adjusted at runtime +# using the StorageService mbean. +tombstone_warn_threshold: 1000 +tombstone_failure_threshold: 100000 + +# Filtering and secondary index queries at read consistency levels above ONE/LOCAL_ONE use a +# mechanism called replica filtering protection to ensure that results from stale replicas do +# not violate consistency. (See CASSANDRA-8272 and CASSANDRA-15907 for more details.) This +# mechanism materializes replica results by partition on-heap at the coordinator. The more possibly +# stale results returned by the replicas, the more rows materialized during the query. +replica_filtering_protection: + # These thresholds exist to limit the damage severely out-of-date replicas can cause during these + # queries. They limit the number of rows from all replicas individual index and filtering queries + # can materialize on-heap to return correct results at the desired read consistency level. + # + # "cached_replica_rows_warn_threshold" is the per-query threshold at which a warning will be logged. + # "cached_replica_rows_fail_threshold" is the per-query threshold at which the query will fail. + # + # These thresholds may also be adjusted at runtime using the StorageService mbean. + # + # If the failure threshold is breached, it is likely that either the current page/fetch size + # is too large or one or more replicas is severely out-of-sync and in need of repair. + cached_rows_warn_threshold: 2000 + cached_rows_fail_threshold: 32000 + +# Log WARN on any multiple-partition batch size exceeding this value. 5kb per batch by default. +# Caution should be taken on increasing the size of this threshold as it can lead to node instability. +batch_size_warn_threshold_in_kb: 5 + +# Fail any multiple-partition batch exceeding this value. 50kb (10x warn threshold) by default. +batch_size_fail_threshold_in_kb: 50 + +# Log WARN on any batches not of type LOGGED than span across more partitions than this limit +unlogged_batch_across_partitions_warn_threshold: 10 + +# Log a warning when compacting partitions larger than this value +compaction_large_partition_warning_threshold_mb: 100 + +# GC Pauses greater than 200 ms will be logged at INFO level +# This threshold can be adjusted to minimize logging if necessary +# gc_log_threshold_in_ms: 200 + +# GC Pauses greater than gc_warn_threshold_in_ms will be logged at WARN level +# Adjust the threshold based on your application throughput requirement. Setting to 0 +# will deactivate the feature. +# gc_warn_threshold_in_ms: 1000 + +# Maximum size of any value in SSTables. Safety measure to detect SSTable corruption +# early. Any value size larger than this threshold will result into marking an SSTable +# as corrupted. This should be positive and less than 2048. +# max_value_size_in_mb: 256 + +# Track a metric per keyspace indicating whether replication achieved the ideal consistency +# level for writes without timing out. This is different from the consistency level requested by +# each write which may be lower in order to facilitate availability. +# ideal_consistency_level: EACH_QUORUM + +# Automatically upgrade sstables after upgrade - if there is no ordinary compaction to do, the +# oldest non-upgraded sstable will get upgraded to the latest version +# automatic_sstable_upgrade: false +# Limit the number of concurrent sstable upgrades +# max_concurrent_automatic_sstable_upgrades: 1 + +# Audit logging - Logs every incoming CQL command request, authentication to a node. See the docs +# on audit_logging for full details about the various configuration options. +audit_logging_options: + enabled: false + logger: + - class_name: BinAuditLogger + # audit_logs_dir: + # included_keyspaces: + # excluded_keyspaces: system, system_schema, system_virtual_schema + # included_categories: + # excluded_categories: + # included_users: + # excluded_users: + # roll_cycle: HOURLY + # block: true + # max_queue_weight: 268435456 # 256 MiB + # max_log_size: 17179869184 # 16 GiB + ## archive command is "/path/to/script.sh %path" where %path is replaced with the file being rolled: + # archive_command: + # max_archive_retries: 10 + + +# default options for full query logging - these can be overridden from command line when executing +# nodetool enablefullquerylog +#full_query_logging_options: + # log_dir: + # roll_cycle: HOURLY + # block: true + # max_queue_weight: 268435456 # 256 MiB + # max_log_size: 17179869184 # 16 GiB + ## archive command is "/path/to/script.sh %path" where %path is replaced with the file being rolled: + # archive_command: + # max_archive_retries: 10 + +# validate tombstones on reads and compaction +# can be either "disabled", "warn" or "exception" +# corrupted_tombstone_strategy: disabled + +# Diagnostic Events # +# If enabled, diagnostic events can be helpful for troubleshooting operational issues. Emitted events contain details +# on internal state and temporal relationships across events, accessible by clients via JMX. +diagnostic_events_enabled: false + +# Use native transport TCP message coalescing. If on upgrade to 4.0 you found your throughput decreasing, and in +# particular you run an old kernel or have very fewer client connections, this option might be worth evaluating. +#native_transport_flush_in_batches_legacy: false + +# Enable tracking of repaired state of data during reads and comparison between replicas +# Mismatches between the repaired sets of replicas can be characterized as either confirmed +# or unconfirmed. In this context, unconfirmed indicates that the presence of pending repair +# sessions, unrepaired partition tombstones, or some other condition means that the disparity +# cannot be considered conclusive. Confirmed mismatches should be a trigger for investigation +# as they may be indicative of corruption or data loss. +# There are separate flags for range vs partition reads as single partition reads are only tracked +# when CL > 1 and a digest mismatch occurs. Currently, range queries don't use digests so if +# enabled for range reads, all range reads will include repaired data tracking. As this adds +# some overhead, operators may wish to disable it whilst still enabling it for partition reads +repaired_data_tracking_for_range_reads_enabled: false +repaired_data_tracking_for_partition_reads_enabled: false +# If false, only confirmed mismatches will be reported. If true, a separate metric for unconfirmed +# mismatches will also be recorded. This is to avoid potential signal:noise issues are unconfirmed +# mismatches are less actionable than confirmed ones. +report_unconfirmed_repaired_data_mismatches: false + +# Having many tables and/or keyspaces negatively affects performance of many operations in the +# cluster. When the number of tables/keyspaces in the cluster exceeds the following thresholds +# a client warning will be sent back to the user when creating a table or keyspace. +# table_count_warn_threshold: 150 +# keyspace_count_warn_threshold: 40 + +######################### +# EXPERIMENTAL FEATURES # +######################### + +# Enables materialized view creation on this node. +# Materialized views are considered experimental and are not recommended for production use. +enable_materialized_views: true + +# Enables SASI index creation on this node. +# SASI indexes are considered experimental and are not recommended for production use. +enable_sasi_indexes: true + +# Enables creation of transiently replicated keyspaces on this node. +# Transient replication is experimental and is not recommended for production use. +enable_transient_replication: false + +# Enables the used of 'ALTER ... DROP COMPACT STORAGE' statements on this node. +# 'ALTER ... DROP COMPACT STORAGE' is considered experimental and is not recommended for production use. +enable_drop_compact_storage: false diff --git a/etc/cass2/commitlog_archiving.properties b/etc/cass2/commitlog_archiving.properties new file mode 100644 index 0000000..393259c --- /dev/null +++ b/etc/cass2/commitlog_archiving.properties @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# commitlog archiving configuration. Leave blank to disable. + +# Command to execute to archive a commitlog segment +# Parameters: %path => Fully qualified path of the segment to archive +# %name => Name of the commit log. +# Example: archive_command=/bin/ln %path /backup/%name +# +# Limitation: *_command= expects one command with arguments. STDOUT +# and STDIN or multiple commands cannot be executed. You might want +# to script multiple commands and add a pointer here. +archive_command= + +# Command to execute to make an archived commitlog live again. +# Parameters: %from is the full path to an archived commitlog segment (from restore_directories) +# %to is the live commitlog directory +# Example: restore_command=/bin/cp -f %from %to +restore_command= + +# Directory to scan the recovery files in. +restore_directories= + +# Restore mutations created up to and including this timestamp in GMT. +# Format: yyyy:MM:dd HH:mm:ss (2012:04:31 20:43:12) +# +# Recovery will continue through the segment when the first client-supplied +# timestamp greater than this time is encountered, but only mutations less than +# or equal to this timestamp will be applied. +restore_point_in_time= + +# precision of the timestamp used in the inserts (MILLISECONDS, MICROSECONDS, ...) +precision=MICROSECONDS diff --git a/etc/cass2/jvm-clients.options b/etc/cass2/jvm-clients.options new file mode 100644 index 0000000..6181ed0 --- /dev/null +++ b/etc/cass2/jvm-clients.options @@ -0,0 +1,10 @@ +########################################################################### +# jvm-clients.options # +# # +# See jvm8-clients.options and jvm11-clients.options for Java version # +# specific options. # +########################################################################### + +# intentionally left empty + +# The newline in the end of file is intentional diff --git a/etc/cass2/jvm-server.options b/etc/cass2/jvm-server.options new file mode 100644 index 0000000..46967f4 --- /dev/null +++ b/etc/cass2/jvm-server.options @@ -0,0 +1,188 @@ +########################################################################### +# jvm-server.options # +# # +# - all flags defined here will be used by cassandra to startup the JVM # +# - one flag should be specified per line # +# - lines that do not start with '-' will be ignored # +# - only static flags are accepted (no variables or parameters) # +# - dynamic flags will be appended to these on cassandra-env # +# # +# See jvm8-server.options and jvm11-server.options for Java version # +# specific options. # +########################################################################### + +###################### +# STARTUP PARAMETERS # +###################### + +# Uncomment any of the following properties to enable specific startup parameters + +# In a multi-instance deployment, multiple Cassandra instances will independently assume that all +# CPU processors are available to it. This setting allows you to specify a smaller set of processors +# and perhaps have affinity. +#-Dcassandra.available_processors=number_of_processors + +# The directory location of the cassandra.yaml file. +#-Dcassandra.config=directory + +# Sets the initial partitioner token for a node the first time the node is started. +#-Dcassandra.initial_token=token + +# Set to false to start Cassandra on a node but not have the node join the cluster. +#-Dcassandra.join_ring=true|false + +# Set to false to clear all gossip state for the node on restart. Use when you have changed node +# information in cassandra.yaml (such as listen_address). +#-Dcassandra.load_ring_state=true|false + +# Enable pluggable metrics reporter. See Pluggable metrics reporting in Cassandra 2.0.2. +#-Dcassandra.metricsReporterConfigFile=file + +# Set the port on which the CQL native transport listens for clients. (Default: 9042) +#-Dcassandra.native_transport_port=port + +# Overrides the partitioner. (Default: org.apache.cassandra.dht.Murmur3Partitioner) +#-Dcassandra.partitioner=partitioner + +# To replace a node that has died, restart a new node in its place specifying the address of the +# dead node. The new node must not have any data in its data directory, that is, it must be in the +# same state as before bootstrapping. +#-Dcassandra.replace_address=listen_address or broadcast_address of dead node + +# Allow restoring specific tables from an archived commit log. +#-Dcassandra.replayList=table + +# Allows overriding of the default RING_DELAY (30000ms), which is the amount of time a node waits +# before joining the ring. +#-Dcassandra.ring_delay_ms=ms + +# Set the SSL port for encrypted communication. (Default: 7001) +#-Dcassandra.ssl_storage_port=port + +# Set the port for inter-node communication. (Default: 7000) +#-Dcassandra.storage_port=port + +# Set the default location for the trigger JARs. (Default: conf/triggers) +#-Dcassandra.triggers_dir=directory + +# For testing new compaction and compression strategies. It allows you to experiment with different +# strategies and benchmark write performance differences without affecting the production workload. +#-Dcassandra.write_survey=true + +# To disable configuration via JMX of auth caches (such as those for credentials, permissions and +# roles). This will mean those config options can only be set (persistently) in cassandra.yaml +# and will require a restart for new values to take effect. +#-Dcassandra.disable_auth_caches_remote_configuration=true + +# To disable dynamic calculation of the page size used when indexing an entire partition (during +# initial index build/rebuild). If set to true, the page size will be fixed to the default of +# 10000 rows per page. +#-Dcassandra.force_default_indexing_page_size=true + +# Imposes an upper bound on hint lifetime below the normal min gc_grace_seconds +#-Dcassandra.maxHintTTL=max_hint_ttl_in_seconds + +######################## +# GENERAL JVM SETTINGS # +######################## + +# enable assertions. highly suggested for correct application functionality. +-ea + +# disable assertions for net.openhft.** because it runs out of memory by design +# if enabled and run for more than just brief testing +-da:net.openhft... + +# enable thread priorities, primarily so we can give periodic tasks +# a lower priority to avoid interfering with client workload +-XX:+UseThreadPriorities + +# Enable heap-dump if there's an OOM +-XX:+HeapDumpOnOutOfMemoryError + +# Per-thread stack size. +-Xss256k + +# Make sure all memory is faulted and zeroed on startup. +# This helps prevent soft faults in containers and makes +# transparent hugepage allocation more effective. +-XX:+AlwaysPreTouch + +# Disable biased locking as it does not benefit Cassandra. +-XX:-UseBiasedLocking + +# Enable thread-local allocation blocks and allow the JVM to automatically +# resize them at runtime. +-XX:+UseTLAB +-XX:+ResizeTLAB +-XX:+UseNUMA + +# http://www.evanjones.ca/jvm-mmap-pause.html +-XX:+PerfDisableSharedMem + +# Prefer binding to IPv4 network intefaces (when net.ipv6.bindv6only=1). See +# http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6342561 (short version: +# comment out this entry to enable IPv6 support). +-Djava.net.preferIPv4Stack=true + +### Debug options + +# uncomment to enable flight recorder +#-XX:+UnlockCommercialFeatures +#-XX:+FlightRecorder + +# uncomment to have Cassandra JVM listen for remote debuggers/profilers on port 1414 +#-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=1414 + +# uncomment to have Cassandra JVM log internal method compilation (developers only) +#-XX:+UnlockDiagnosticVMOptions +#-XX:+LogCompilation + +################# +# HEAP SETTINGS # +################# + +# Heap size is automatically calculated by cassandra-env based on this +# formula: max(min(1/2 ram, 1024MB), min(1/4 ram, 8GB)) +# That is: +# - calculate 1/2 ram and cap to 1024MB +# - calculate 1/4 ram and cap to 8192MB +# - pick the max +# +# For production use you may wish to adjust this for your environment. +# If that's the case, uncomment the -Xmx and Xms options below to override the +# automatic calculation of JVM heap memory. +# +# It is recommended to set min (-Xms) and max (-Xmx) heap sizes to +# the same value to avoid stop-the-world GC pauses during resize, and +# so that we can lock the heap in memory on startup to prevent any +# of it from being swapped out. +#-Xms4G +#-Xmx4G + +# Young generation size is automatically calculated by cassandra-env +# based on this formula: min(100 * num_cores, 1/4 * heap size) +# +# The main trade-off for the young generation is that the larger it +# is, the longer GC pause times will be. The shorter it is, the more +# expensive GC will be (usually). +# +# It is not recommended to set the young generation size if using the +# G1 GC, since that will override the target pause-time goal. +# More info: http://www.oracle.com/technetwork/articles/java/g1gc-1984535.html +# +# The example below assumes a modern 8-core+ machine for decent +# times. If in doubt, and if you do not particularly want to tweak, go +# 100 MB per physical CPU core. +#-Xmn800M + +################################### +# EXPIRATION DATE OVERFLOW POLICY # +################################### + +# Defines how to handle INSERT requests with TTL exceeding the maximum supported expiration date: +# * REJECT: this is the default policy and will reject any requests with expiration date timestamp after 2038-01-19T03:14:06+00:00. +# * CAP: any insert with TTL expiring after 2038-01-19T03:14:06+00:00 will expire on 2038-01-19T03:14:06+00:00 and the client will receive a warning. +# * CAP_NOWARN: same as previous, except that the client warning will not be emitted. +# +#-Dcassandra.expiration_date_overflow_policy=REJECT diff --git a/etc/cass2/jvm11-clients.options b/etc/cass2/jvm11-clients.options new file mode 100644 index 0000000..c88b7ab --- /dev/null +++ b/etc/cass2/jvm11-clients.options @@ -0,0 +1,29 @@ +########################################################################### +# jvm11-clients.options # +# # +# See jvm-clients.options. This file is specific for Java 11 and newer. # +########################################################################### + +################### +# JPMS SETTINGS # +################### + +-Djdk.attach.allowAttachSelf=true +--add-exports java.base/jdk.internal.misc=ALL-UNNAMED +--add-exports java.base/jdk.internal.ref=ALL-UNNAMED +--add-exports java.base/sun.nio.ch=ALL-UNNAMED +--add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED +--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED +--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED +--add-exports java.sql/java.sql=ALL-UNNAMED + +--add-opens java.base/java.lang.module=ALL-UNNAMED +--add-opens java.base/jdk.internal.loader=ALL-UNNAMED +--add-opens java.base/jdk.internal.ref=ALL-UNNAMED +--add-opens java.base/jdk.internal.reflect=ALL-UNNAMED +--add-opens java.base/jdk.internal.math=ALL-UNNAMED +--add-opens java.base/jdk.internal.module=ALL-UNNAMED +--add-opens java.base/jdk.internal.util.jar=ALL-UNNAMED +--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED + +# The newline in the end of file is intentional diff --git a/etc/cass2/jvm11-server.options b/etc/cass2/jvm11-server.options new file mode 100644 index 0000000..7e78467 --- /dev/null +++ b/etc/cass2/jvm11-server.options @@ -0,0 +1,103 @@ +########################################################################### +# jvm11-server.options # +# # +# See jvm-server.options. This file is specific for Java 11 and newer. # +########################################################################### + +################# +# GC SETTINGS # +################# + + + +### CMS Settings +-XX:+UseConcMarkSweepGC +-XX:+CMSParallelRemarkEnabled +-XX:SurvivorRatio=8 +-XX:MaxTenuringThreshold=1 +-XX:CMSInitiatingOccupancyFraction=75 +-XX:+UseCMSInitiatingOccupancyOnly +-XX:CMSWaitDuration=10000 +-XX:+CMSParallelInitialMarkEnabled +-XX:+CMSEdenChunksRecordAlways +## some JVMs will fill up their heap when accessed via JMX, see CASSANDRA-6541 +-XX:+CMSClassUnloadingEnabled + + + +### G1 Settings +## Use the Hotspot garbage-first collector. +#-XX:+UseG1GC +#-XX:+ParallelRefProcEnabled + +# +## Have the JVM do less remembered set work during STW, instead +## preferring concurrent GC. Reduces p99.9 latency. +#-XX:G1RSetUpdatingPauseTimePercent=5 +# +## Main G1GC tunable: lowering the pause target will lower throughput and vise versa. +## 200ms is the JVM default and lowest viable setting +## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml. +#-XX:MaxGCPauseMillis=500 + +## Optional G1 Settings +# Save CPU time on large (>= 16GB) heaps by delaying region scanning +# until the heap is 70% full. The default in Hotspot 8u40 is 40%. +#-XX:InitiatingHeapOccupancyPercent=70 + +# For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores. +# Otherwise equal to the number of cores when 8 or less. +# Machines with > 10 cores should try setting these to <= full cores. +#-XX:ParallelGCThreads=16 +# By default, ConcGCThreads is 1/4 of ParallelGCThreads. +# Setting both to the same value can reduce STW durations. +#-XX:ConcGCThreads=16 + + +### JPMS + +-Djdk.attach.allowAttachSelf=true +--add-exports java.base/jdk.internal.misc=ALL-UNNAMED +--add-exports java.base/jdk.internal.ref=ALL-UNNAMED +--add-exports java.base/sun.nio.ch=ALL-UNNAMED +--add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED +--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED +--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED +--add-exports java.sql/java.sql=ALL-UNNAMED + +--add-opens java.base/java.lang.module=ALL-UNNAMED +--add-opens java.base/jdk.internal.loader=ALL-UNNAMED +--add-opens java.base/jdk.internal.ref=ALL-UNNAMED +--add-opens java.base/jdk.internal.reflect=ALL-UNNAMED +--add-opens java.base/jdk.internal.math=ALL-UNNAMED +--add-opens java.base/jdk.internal.module=ALL-UNNAMED +--add-opens java.base/jdk.internal.util.jar=ALL-UNNAMED +--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED + + +### GC logging options -- uncomment to enable + +# Java 11 (and newer) GC logging options: +# See description of https://bugs.openjdk.java.net/browse/JDK-8046148 for details about the syntax +# The following is the equivalent to -XX:+PrintGCDetails -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M +#-Xlog:gc=info,heap*=trace,age*=debug,safepoint=info,promotion*=trace:file=/var/log/cassandra/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760 + +# Notes for Java 8 migration: +# +# -XX:+PrintGCDetails maps to -Xlog:gc*:... - i.e. add a '*' after "gc" +# -XX:+PrintGCDateStamps maps to decorator 'time' +# +# -XX:+PrintHeapAtGC maps to 'heap' with level 'trace' +# -XX:+PrintTenuringDistribution maps to 'age' with level 'debug' +# -XX:+PrintGCApplicationStoppedTime maps to 'safepoint' with level 'info' +# -XX:+PrintPromotionFailure maps to 'promotion' with level 'trace' +# -XX:PrintFLSStatistics=1 maps to 'freelist' with level 'trace' + +### Netty Options + +# On Java >= 9 Netty requires the io.netty.tryReflectionSetAccessible system property to be set to true to enable +# creation of direct buffers using Unsafe. Without it, this falls back to ByteBuffer.allocateDirect which has +# inferior performance and risks exceeding MaxDirectMemory +-Dio.netty.tryReflectionSetAccessible=true + +# The newline in the end of file is intentional diff --git a/etc/cass2/jvm8-clients.options b/etc/cass2/jvm8-clients.options new file mode 100644 index 0000000..7d1b2ef --- /dev/null +++ b/etc/cass2/jvm8-clients.options @@ -0,0 +1,9 @@ +########################################################################### +# jvm8-clients.options # +# # +# See jvm-clients.options. This file is specific for Java 8 and newer. # +########################################################################### + +# intentionally left empty + +# The newline in the end of file is intentional diff --git a/etc/cass2/jvm8-server.options b/etc/cass2/jvm8-server.options new file mode 100644 index 0000000..6214669 --- /dev/null +++ b/etc/cass2/jvm8-server.options @@ -0,0 +1,76 @@ +########################################################################### +# jvm8-server.options # +# # +# See jvm-server.options. This file is specific for Java 8 and newer. # +########################################################################### + +######################## +# GENERAL JVM SETTINGS # +######################## + +# allows lowering thread priority without being root on linux - probably +# not necessary on Windows but doesn't harm anything. +# see http://tech.stolsvik.com/2010/01/linux-java-thread-priorities-workaround.html +-XX:ThreadPriorityPolicy=42 + +################# +# GC SETTINGS # +################# + +### CMS Settings +-XX:+UseParNewGC +-XX:+UseConcMarkSweepGC +-XX:+CMSParallelRemarkEnabled +-XX:SurvivorRatio=8 +-XX:MaxTenuringThreshold=1 +-XX:CMSInitiatingOccupancyFraction=75 +-XX:+UseCMSInitiatingOccupancyOnly +-XX:CMSWaitDuration=10000 +-XX:+CMSParallelInitialMarkEnabled +-XX:+CMSEdenChunksRecordAlways +## some JVMs will fill up their heap when accessed via JMX, see CASSANDRA-6541 +-XX:+CMSClassUnloadingEnabled + +### G1 Settings +## Use the Hotspot garbage-first collector. +#-XX:+UseG1GC +#-XX:+ParallelRefProcEnabled + +# +## Have the JVM do less remembered set work during STW, instead +## preferring concurrent GC. Reduces p99.9 latency. +#-XX:G1RSetUpdatingPauseTimePercent=5 +# +## Main G1GC tunable: lowering the pause target will lower throughput and vise versa. +## 200ms is the JVM default and lowest viable setting +## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml. +#-XX:MaxGCPauseMillis=500 + +## Optional G1 Settings +# Save CPU time on large (>= 16GB) heaps by delaying region scanning +# until the heap is 70% full. The default in Hotspot 8u40 is 40%. +#-XX:InitiatingHeapOccupancyPercent=70 + +# For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores. +# Otherwise equal to the number of cores when 8 or less. +# Machines with > 10 cores should try setting these to <= full cores. +#-XX:ParallelGCThreads=16 +# By default, ConcGCThreads is 1/4 of ParallelGCThreads. +# Setting both to the same value can reduce STW durations. +#-XX:ConcGCThreads=16 + +### GC logging options -- uncomment to enable + +-XX:+PrintGCDetails +-XX:+PrintGCDateStamps +-XX:+PrintHeapAtGC +-XX:+PrintTenuringDistribution +-XX:+PrintGCApplicationStoppedTime +-XX:+PrintPromotionFailure +#-XX:PrintFLSStatistics=1 +#-Xloggc:/var/log/cassandra/gc.log +-XX:+UseGCLogFileRotation +-XX:NumberOfGCLogFiles=10 +-XX:GCLogFileSize=10M + +# The newline in the end of file is intentional diff --git a/etc/cass2/logback.xml b/etc/cass2/logback.xml new file mode 100644 index 0000000..e98fea4 --- /dev/null +++ b/etc/cass2/logback.xml @@ -0,0 +1,124 @@ + + + + + + + + + + + + + + INFO + + ${cassandra.logdir}/system.log + + + ${cassandra.logdir}/system.log.%d{yyyy-MM-dd}.%i.zip + + 50MB + 7 + 5GB + + + %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + + + + + + + ${cassandra.logdir}/debug.log + + + ${cassandra.logdir}/debug.log.%d{yyyy-MM-dd}.%i.zip + + 50MB + 7 + 5GB + + + %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + + + + + + + 1024 + 0 + true + + + + + + + + INFO + + + %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/etc/cass3/.gitignore b/etc/cass3/.gitignore new file mode 100644 index 0000000..e81e4e0 --- /dev/null +++ b/etc/cass3/.gitignore @@ -0,0 +1,11 @@ +# ignore everything +#* + +# except +# .gitignore +# !cassandra.yaml +# !jvm* +# !logback.xml +# !commitlog_archiving.properties +# !cassandra-rackdc.properties +# !cassandra-env.sh \ No newline at end of file diff --git a/etc/cass3/cassandra-env.sh b/etc/cass3/cassandra-env.sh new file mode 100644 index 0000000..2e3c8c9 --- /dev/null +++ b/etc/cass3/cassandra-env.sh @@ -0,0 +1,307 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +calculate_heap_sizes() +{ + case "`uname`" in + Linux) + system_memory_in_mb=`free -m | awk '/:/ {print $2;exit}'` + system_cpu_cores=`egrep -c 'processor([[:space:]]+):.*' /proc/cpuinfo` + ;; + FreeBSD) + system_memory_in_bytes=`sysctl hw.physmem | awk '{print $2}'` + system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024` + system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'` + ;; + SunOS) + system_memory_in_mb=`prtconf | awk '/Memory size:/ {print $3}'` + system_cpu_cores=`psrinfo | wc -l` + ;; + Darwin) + system_memory_in_bytes=`sysctl hw.memsize | awk '{print $2}'` + system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024` + system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'` + ;; + *) + # assume reasonable defaults for e.g. a modern desktop or + # cheap server + system_memory_in_mb="2048" + system_cpu_cores="2" + ;; + esac + + # some systems like the raspberry pi don't report cores, use at least 1 + if [ "$system_cpu_cores" -lt "1" ] + then + system_cpu_cores="1" + fi + + # set max heap size based on the following + # max(min(1/2 ram, 1024MB), min(1/4 ram, 8GB)) + # calculate 1/2 ram and cap to 1024MB + # calculate 1/4 ram and cap to 8192MB + # pick the max + half_system_memory_in_mb=`expr $system_memory_in_mb / 2` + quarter_system_memory_in_mb=`expr $half_system_memory_in_mb / 2` + if [ "$half_system_memory_in_mb" -gt "1024" ] + then + half_system_memory_in_mb="1024" + fi + if [ "$quarter_system_memory_in_mb" -gt "8192" ] + then + quarter_system_memory_in_mb="8192" + fi + if [ "$half_system_memory_in_mb" -gt "$quarter_system_memory_in_mb" ] + then + max_heap_size_in_mb="$half_system_memory_in_mb" + else + max_heap_size_in_mb="$quarter_system_memory_in_mb" + fi + MAX_HEAP_SIZE="${max_heap_size_in_mb}M" + + # Young gen: min(max_sensible_per_modern_cpu_core * num_cores, 1/4 * heap size) + max_sensible_yg_per_core_in_mb="100" + max_sensible_yg_in_mb=`expr $max_sensible_yg_per_core_in_mb "*" $system_cpu_cores` + + desired_yg_in_mb=`expr $max_heap_size_in_mb / 4` + + if [ "$desired_yg_in_mb" -gt "$max_sensible_yg_in_mb" ] + then + HEAP_NEWSIZE="${max_sensible_yg_in_mb}M" + else + HEAP_NEWSIZE="${desired_yg_in_mb}M" + fi +} + +# Sets the path where logback and GC logs are written. +if [ "x$CASSANDRA_LOG_DIR" = "x" ] ; then + CASSANDRA_LOG_DIR="$CASSANDRA_HOME/logs" +fi + +#GC log path has to be defined here because it needs to access CASSANDRA_HOME +if [ $JAVA_VERSION -ge 11 ] ; then + # See description of https://bugs.openjdk.java.net/browse/JDK-8046148 for details about the syntax + # The following is the equivalent to -XX:+PrintGCDetails -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M + echo "$JVM_OPTS" | grep -qe "-[X]log:gc" + if [ "$?" = "1" ] ; then # [X] to prevent ccm from replacing this line + # only add -Xlog:gc if it's not mentioned in jvm-server.options file + mkdir -p ${CASSANDRA_LOG_DIR} + JVM_OPTS="$JVM_OPTS -Xlog:gc=info,heap*=trace,age*=debug,safepoint=info,promotion*=trace:file=${CASSANDRA_LOG_DIR}/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760" + fi +else + # Java 8 + echo "$JVM_OPTS" | grep -qe "-[X]loggc" + if [ "$?" = "1" ] ; then # [X] to prevent ccm from replacing this line + # only add -Xlog:gc if it's not mentioned in jvm-server.options file + mkdir -p ${CASSANDRA_LOG_DIR} + JVM_OPTS="$JVM_OPTS -Xloggc:${CASSANDRA_LOG_DIR}/gc.log" + fi +fi + +# Check what parameters were defined on jvm-server.options file to avoid conflicts +echo $JVM_OPTS | grep -q Xmn +DEFINED_XMN=$? +echo $JVM_OPTS | grep -q Xmx +DEFINED_XMX=$? +echo $JVM_OPTS | grep -q Xms +DEFINED_XMS=$? +echo $JVM_OPTS | grep -q UseConcMarkSweepGC +USING_CMS=$? +echo $JVM_OPTS | grep -q +UseG1GC +USING_G1=$? + +# Override these to set the amount of memory to allocate to the JVM at +# start-up. For production use you may wish to adjust this for your +# environment. MAX_HEAP_SIZE is the total amount of memory dedicated +# to the Java heap. HEAP_NEWSIZE refers to the size of the young +# generation. Both MAX_HEAP_SIZE and HEAP_NEWSIZE should be either set +# or not (if you set one, set the other). +# +# The main trade-off for the young generation is that the larger it +# is, the longer GC pause times will be. The shorter it is, the more +# expensive GC will be (usually). +# +# The example HEAP_NEWSIZE assumes a modern 8-core+ machine for decent pause +# times. If in doubt, and if you do not particularly want to tweak, go with +# 100 MB per physical CPU core. + +#MAX_HEAP_SIZE="4G" +#HEAP_NEWSIZE="800M" + +# Set this to control the amount of arenas per-thread in glibc +#export MALLOC_ARENA_MAX=4 + +# only calculate the size if it's not set manually +if [ "x$MAX_HEAP_SIZE" = "x" ] && [ "x$HEAP_NEWSIZE" = "x" -o $USING_G1 -eq 0 ]; then + calculate_heap_sizes +elif [ "x$MAX_HEAP_SIZE" = "x" ] || [ "x$HEAP_NEWSIZE" = "x" -a $USING_G1 -ne 0 ]; then + echo "please set or unset MAX_HEAP_SIZE and HEAP_NEWSIZE in pairs when using CMS GC (see cassandra-env.sh)" + exit 1 +fi + +if [ "x$MALLOC_ARENA_MAX" = "x" ] ; then + export MALLOC_ARENA_MAX=4 +fi + +# We only set -Xms and -Xmx if they were not defined on jvm-server.options file +# If defined, both Xmx and Xms should be defined together. +if [ $DEFINED_XMX -ne 0 ] && [ $DEFINED_XMS -ne 0 ]; then + JVM_OPTS="$JVM_OPTS -Xms${MAX_HEAP_SIZE}" + JVM_OPTS="$JVM_OPTS -Xmx${MAX_HEAP_SIZE}" +elif [ $DEFINED_XMX -ne 0 ] || [ $DEFINED_XMS -ne 0 ]; then + echo "Please set or unset -Xmx and -Xms flags in pairs on jvm-server.options file." + exit 1 +fi + +# We only set -Xmn flag if it was not defined in jvm-server.options file +# and if the CMS GC is being used +# If defined, both Xmn and Xmx should be defined together. +if [ $DEFINED_XMN -eq 0 ] && [ $DEFINED_XMX -ne 0 ]; then + echo "Please set or unset -Xmx and -Xmn flags in pairs on jvm-server.options file." + exit 1 +elif [ $DEFINED_XMN -ne 0 ] && [ $USING_CMS -eq 0 ]; then + JVM_OPTS="$JVM_OPTS -Xmn${HEAP_NEWSIZE}" +fi + +# We fail to start if -Xmn is used with G1 GC is being used +# See comments for -Xmn in jvm-server.options +if [ $DEFINED_XMN -eq 0 ] && [ $USING_G1 -eq 0 ]; then + echo "It is not recommended to set -Xmn with the G1 garbage collector. See comments for -Xmn in jvm-server.options for details." + exit 1 +fi + +if [ "$JVM_ARCH" = "64-Bit" ] && [ $USING_CMS -eq 0 ]; then + JVM_OPTS="$JVM_OPTS -XX:+UseCondCardMark" +fi + +# provides hints to the JIT compiler +JVM_OPTS="$JVM_OPTS -XX:CompileCommandFile=$CASSANDRA_CONF/hotspot_compiler" + +# add the jamm javaagent +JVM_OPTS="$JVM_OPTS -javaagent:$CASSANDRA_HOME/lib/jamm-0.3.2.jar" + +# set jvm HeapDumpPath with CASSANDRA_HEAPDUMP_DIR +if [ "x$CASSANDRA_HEAPDUMP_DIR" != "x" ]; then + JVM_OPTS="$JVM_OPTS -XX:HeapDumpPath=$CASSANDRA_HEAPDUMP_DIR/cassandra-`date +%s`-pid$$.hprof" +fi + +# stop the jvm on OutOfMemoryError as it can result in some data corruption +# uncomment the preferred option +# ExitOnOutOfMemoryError and CrashOnOutOfMemoryError require a JRE greater or equals to 1.7 update 101 or 1.8 update 92 +# For OnOutOfMemoryError we cannot use the JVM_OPTS variables because bash commands split words +# on white spaces without taking quotes into account +# JVM_OPTS="$JVM_OPTS -XX:+ExitOnOutOfMemoryError" +# JVM_OPTS="$JVM_OPTS -XX:+CrashOnOutOfMemoryError" +JVM_ON_OUT_OF_MEMORY_ERROR_OPT="-XX:OnOutOfMemoryError=kill -9 %p" + +# print an heap histogram on OutOfMemoryError +# JVM_OPTS="$JVM_OPTS -Dcassandra.printHeapHistogramOnOutOfMemoryError=true" + +# jmx: metrics and administration interface +# +# add this if you're having trouble connecting: +# JVM_OPTS="$JVM_OPTS -Djava.rmi.server.hostname=" +# +# see +# https://blogs.oracle.com/jmxetc/entry/troubleshooting_connection_problems_in_jconsole +# for more on configuring JMX through firewalls, etc. (Short version: +# get it working with no firewall first.) +# +# Cassandra ships with JMX accessible *only* from localhost. +# To enable remote JMX connections, uncomment lines below +# with authentication and/or ssl enabled. See https://wiki.apache.org/cassandra/JmxSecurity +# +if [ "x$LOCAL_JMX" = "x" ]; then + LOCAL_JMX=yes +fi + +# Specifies the default port over which Cassandra will be available for +# JMX connections. +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +JMX_PORT="7199" + +if [ "$LOCAL_JMX" = "yes" ]; then + JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.local.port=$JMX_PORT" + JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=false" +else + JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.port=$JMX_PORT" + # if ssl is enabled the same port cannot be used for both jmx and rmi so either + # pick another value for this property or comment out to use a random port (though see CASSANDRA-7087 for origins) + JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.rmi.port=$JMX_PORT" + + # turn on JMX authentication. See below for further options + JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=true" + + # jmx ssl options + #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl=true" + #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.need.client.auth=true" + #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.enabled.protocols=" + #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.enabled.cipher.suites=" + #JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.keyStore=/path/to/keystore" + #JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.keyStorePassword=" + #JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.trustStore=/path/to/truststore" + #JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.trustStorePassword=" +fi + +# jmx authentication and authorization options. By default, auth is only +# activated for remote connections but they can also be enabled for local only JMX +## Basic file based authn & authz +JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.password.file=/etc/cassandra/jmxremote.password" +#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.access.file=/etc/cassandra/jmxremote.access" +## Custom auth settings which can be used as alternatives to JMX's out of the box auth utilities. +## JAAS login modules can be used for authentication by uncommenting these two properties. +## Cassandra ships with a LoginModule implementation - org.apache.cassandra.auth.CassandraLoginModule - +## which delegates to the IAuthenticator configured in cassandra.yaml. See the sample JAAS configuration +## file cassandra-jaas.config +#JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.login.config=CassandraLogin" +#JVM_OPTS="$JVM_OPTS -Djava.security.auth.login.config=$CASSANDRA_CONF/cassandra-jaas.config" + +## Cassandra also ships with a helper for delegating JMX authz calls to the configured IAuthorizer, +## uncomment this to use it. Requires one of the two authentication options to be enabled +#JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.authorizer=org.apache.cassandra.auth.jmx.AuthorizationProxy" + +# To use mx4j, an HTML interface for JMX, add mx4j-tools.jar to the lib/ +# directory. +# See http://cassandra.apache.org/doc/latest/operating/metrics.html#jmx +# By default mx4j listens on the broadcast_address, port 8081. Uncomment the following lines +# to control its listen address and port. +#MX4J_ADDRESS="127.0.0.1" +#MX4J_PORT="8081" + +# Cassandra uses SIGAR to capture OS metrics CASSANDRA-7838 +# for SIGAR we have to set the java.library.path +# to the location of the native libraries. +JVM_OPTS="$JVM_OPTS -Djava.library.path=$CASSANDRA_HOME/lib/sigar-bin" + +if [ "x$MX4J_ADDRESS" != "x" ]; then + if [[ "$MX4J_ADDRESS" == \-Dmx4jaddress* ]]; then + # Backward compatible with the older style #13578 + JVM_OPTS="$JVM_OPTS $MX4J_ADDRESS" + else + JVM_OPTS="$JVM_OPTS -Dmx4jaddress=$MX4J_ADDRESS" + fi +fi +if [ "x$MX4J_PORT" != "x" ]; then + if [[ "$MX4J_PORT" == \-Dmx4jport* ]]; then + # Backward compatible with the older style #13578 + JVM_OPTS="$JVM_OPTS $MX4J_PORT" + else + JVM_OPTS="$JVM_OPTS -Dmx4jport=$MX4J_PORT" + fi +fi + +JVM_OPTS="$JVM_OPTS $JVM_EXTRA_OPTS" + diff --git a/etc/cass3/cassandra-rackdc.properties b/etc/cass3/cassandra-rackdc.properties new file mode 100644 index 0000000..84716f5 --- /dev/null +++ b/etc/cass3/cassandra-rackdc.properties @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# These properties are used with GossipingPropertyFileSnitch and will +# indicate the rack and dc for this node +dc= Mars +rack= West + +# Add a suffix to a datacenter name. Used by the Ec2Snitch and Ec2MultiRegionSnitch +# to append a string to the EC2 region name. +#dc_suffix= + +# Uncomment the following line to make this snitch prefer the internal ip when possible, as the Ec2MultiRegionSnitch does. +# prefer_local=true + +# Datacenter and rack naming convention used by the Ec2Snitch and Ec2MultiRegionSnitch. +# Options are: +# legacy : datacenter name is the part of the availability zone name preceding the last "-" +# when the zone ends in -1 and includes the number if not -1. Rack is the portion of +# the availability zone name following the last "-". +# Examples: us-west-1a => dc: us-west, rack: 1a; us-west-2b => dc: us-west-2, rack: 2b; +# YOU MUST USE THIS VALUE IF YOU ARE UPGRADING A PRE-4.0 CLUSTER +# standard : Default value. datacenter name is the standard AWS region name, including the number. +# rack name is the region plus the availability zone letter. +# Examples: us-west-1a => dc: us-west-1, rack: us-west-1a; us-west-2b => dc: us-west-2, rack: us-west-2b; +# ec2_naming_scheme=standard diff --git a/etc/cass3/cassandra.yaml b/etc/cass3/cassandra.yaml new file mode 100644 index 0000000..c2947d0 --- /dev/null +++ b/etc/cass3/cassandra.yaml @@ -0,0 +1,1419 @@ + +# Cassandra storage config YAML + +# NOTE: +# See https://cassandra.apache.org/doc/latest/configuration/ for +# full explanations of configuration directives +# /NOTE + +# The name of the cluster. This is mainly used to prevent machines in +# one logical cluster from joining another. +cluster_name: SolarSystem + +# This defines the number of tokens randomly assigned to this node on the ring +# The more tokens, relative to other nodes, the larger the proportion of data +# that this node will store. You probably want all nodes to have the same number +# of tokens assuming they have equal hardware capability. +# +# If you leave this unspecified, Cassandra will use the default of 1 token for legacy compatibility, +# and will use the initial_token as described below. +# +# Specifying initial_token will override this setting on the node's initial start, +# on subsequent starts, this setting will apply even if initial token is set. +# +# See https://cassandra.apache.org/doc/latest/getting_started/production.html#tokens for +# best practice information about num_tokens. +# +num_tokens: 128 + +# Triggers automatic allocation of num_tokens tokens for this node. The allocation +# algorithm attempts to choose tokens in a way that optimizes replicated load over +# the nodes in the datacenter for the replica factor. +# +# The load assigned to each node will be close to proportional to its number of +# vnodes. +# +# Only supported with the Murmur3Partitioner. + +# Replica factor is determined via the replication strategy used by the specified +# keyspace. +# allocate_tokens_for_keyspace: KEYSPACE + +# Replica factor is explicitly set, regardless of keyspace or datacenter. +# This is the replica factor within the datacenter, like NTS. +allocate_tokens_for_local_replication_factor: 3 + +# initial_token allows you to specify tokens manually. While you can use it with +# vnodes (num_tokens > 1, above) -- in which case you should provide a +# comma-separated list -- it's primarily used when adding nodes to legacy clusters +# that do not have vnodes enabled. +# initial_token: + +# May either be "true" or "false" to enable globally +hinted_handoff_enabled: true + +# When hinted_handoff_enabled is true, a black list of data centers that will not +# perform hinted handoff +# hinted_handoff_disabled_datacenters: +# - DC1 +# - DC2 + +# this defines the maximum amount of time a dead host will have hints +# generated. After it has been dead this long, new hints for it will not be +# created until it has been seen alive and gone down again. +max_hint_window_in_ms: 10800000 # 3 hours + +# Maximum throttle in KBs per second, per delivery thread. This will be +# reduced proportionally to the number of nodes in the cluster. (If there +# are two nodes in the cluster, each delivery thread will use the maximum +# rate; if there are three, each will throttle to half of the maximum, +# since we expect two nodes to be delivering hints simultaneously.) +hinted_handoff_throttle_in_kb: 1024 + +# Number of threads with which to deliver hints; +# Consider increasing this number when you have multi-dc deployments, since +# cross-dc handoff tends to be slower +max_hints_delivery_threads: 2 + +# Directory where Cassandra should store hints. +# If not set, the default directory is $CASSANDRA_HOME/data/hints. +# hints_directory: /var/lib/cassandra/hints + +# How often hints should be flushed from the internal buffers to disk. +# Will *not* trigger fsync. +hints_flush_period_in_ms: 10000 + +# Maximum size for a single hints file, in megabytes. +max_hints_file_size_in_mb: 128 + +# Compression to apply to the hint files. If omitted, hints files +# will be written uncompressed. LZ4, Snappy, and Deflate compressors +# are supported. +#hints_compression: +# - class_name: LZ4Compressor +# parameters: +# - + +# Maximum throttle in KBs per second, total. This will be +# reduced proportionally to the number of nodes in the cluster. +batchlog_replay_throttle_in_kb: 1024 + +# Authentication backend, implementing IAuthenticator; used to identify users +# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthenticator, +# PasswordAuthenticator}. +# +# - AllowAllAuthenticator performs no checks - set it to disable authentication. +# - PasswordAuthenticator relies on username/password pairs to authenticate +# users. It keeps usernames and hashed passwords in system_auth.roles table. +# Please increase system_auth keyspace replication factor if you use this authenticator. +# If using PasswordAuthenticator, CassandraRoleManager must also be used (see below) +authenticator: AllowAllAuthenticator + +# Authorization backend, implementing IAuthorizer; used to limit access/provide permissions +# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthorizer, +# CassandraAuthorizer}. +# +# - AllowAllAuthorizer allows any action to any user - set it to disable authorization. +# - CassandraAuthorizer stores permissions in system_auth.role_permissions table. Please +# increase system_auth keyspace replication factor if you use this authorizer. +authorizer: AllowAllAuthorizer + +# Part of the Authentication & Authorization backend, implementing IRoleManager; used +# to maintain grants and memberships between roles. +# Out of the box, Cassandra provides org.apache.cassandra.auth.CassandraRoleManager, +# which stores role information in the system_auth keyspace. Most functions of the +# IRoleManager require an authenticated login, so unless the configured IAuthenticator +# actually implements authentication, most of this functionality will be unavailable. +# +# - CassandraRoleManager stores role data in the system_auth keyspace. Please +# increase system_auth keyspace replication factor if you use this role manager. +role_manager: CassandraRoleManager + +# Network authorization backend, implementing INetworkAuthorizer; used to restrict user +# access to certain DCs +# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllNetworkAuthorizer, +# CassandraNetworkAuthorizer}. +# +# - AllowAllNetworkAuthorizer allows access to any DC to any user - set it to disable authorization. +# - CassandraNetworkAuthorizer stores permissions in system_auth.network_permissions table. Please +# increase system_auth keyspace replication factor if you use this authorizer. +network_authorizer: AllowAllNetworkAuthorizer + +# Validity period for roles cache (fetching granted roles can be an expensive +# operation depending on the role manager, CassandraRoleManager is one example) +# Granted roles are cached for authenticated sessions in AuthenticatedUser and +# after the period specified here, become eligible for (async) reload. +# Defaults to 2000, set to 0 to disable caching entirely. +# Will be disabled automatically for AllowAllAuthenticator. +roles_validity_in_ms: 2000 + +# Refresh interval for roles cache (if enabled). +# After this interval, cache entries become eligible for refresh. Upon next +# access, an async reload is scheduled and the old value returned until it +# completes. If roles_validity_in_ms is non-zero, then this must be +# also. +# Defaults to the same value as roles_validity_in_ms. +# roles_update_interval_in_ms: 2000 + +# Validity period for permissions cache (fetching permissions can be an +# expensive operation depending on the authorizer, CassandraAuthorizer is +# one example). Defaults to 2000, set to 0 to disable. +# Will be disabled automatically for AllowAllAuthorizer. +permissions_validity_in_ms: 2000 + +# Refresh interval for permissions cache (if enabled). +# After this interval, cache entries become eligible for refresh. Upon next +# access, an async reload is scheduled and the old value returned until it +# completes. If permissions_validity_in_ms is non-zero, then this must be +# also. +# Defaults to the same value as permissions_validity_in_ms. +# permissions_update_interval_in_ms: 2000 + +# Validity period for credentials cache. This cache is tightly coupled to +# the provided PasswordAuthenticator implementation of IAuthenticator. If +# another IAuthenticator implementation is configured, this cache will not +# be automatically used and so the following settings will have no effect. +# Please note, credentials are cached in their encrypted form, so while +# activating this cache may reduce the number of queries made to the +# underlying table, it may not bring a significant reduction in the +# latency of individual authentication attempts. +# Defaults to 2000, set to 0 to disable credentials caching. +credentials_validity_in_ms: 2000 + +# Refresh interval for credentials cache (if enabled). +# After this interval, cache entries become eligible for refresh. Upon next +# access, an async reload is scheduled and the old value returned until it +# completes. If credentials_validity_in_ms is non-zero, then this must be +# also. +# Defaults to the same value as credentials_validity_in_ms. +# credentials_update_interval_in_ms: 2000 + +# The partitioner is responsible for distributing groups of rows (by +# partition key) across nodes in the cluster. The partitioner can NOT be +# changed without reloading all data. If you are adding nodes or upgrading, +# you should set this to the same partitioner that you are currently using. +# +# The default partitioner is the Murmur3Partitioner. Older partitioners +# such as the RandomPartitioner, ByteOrderedPartitioner, and +# OrderPreservingPartitioner have been included for backward compatibility only. +# For new clusters, you should NOT change this value. +# +partitioner: org.apache.cassandra.dht.Murmur3Partitioner + +# Directories where Cassandra should store data on disk. If multiple +# directories are specified, Cassandra will spread data evenly across +# them by partitioning the token ranges. +# If not set, the default directory is $CASSANDRA_HOME/data/data. +# data_file_directories: +# - /var/lib/cassandra/data + +# Directory were Cassandra should store the data of the local system keyspaces. +# By default Cassandra will store the data of the local system keyspaces in the first of the data directories specified +# by data_file_directories. +# This approach ensures that if one of the other disks is lost Cassandra can continue to operate. For extra security +# this setting allows to store those data on a different directory that provides redundancy. +# local_system_data_file_directory: + +# commit log. when running on magnetic HDD, this should be a +# separate spindle than the data directories. +# If not set, the default directory is $CASSANDRA_HOME/data/commitlog. +# commitlog_directory: /var/lib/cassandra/commitlog + +# Enable / disable CDC functionality on a per-node basis. This modifies the logic used +# for write path allocation rejection (standard: never reject. cdc: reject Mutation +# containing a CDC-enabled table if at space limit in cdc_raw_directory). +cdc_enabled: false + +# CommitLogSegments are moved to this directory on flush if cdc_enabled: true and the +# segment contains mutations for a CDC-enabled table. This should be placed on a +# separate spindle than the data directories. If not set, the default directory is +# $CASSANDRA_HOME/data/cdc_raw. +# cdc_raw_directory: /var/lib/cassandra/cdc_raw + +# Policy for data disk failures: +# +# die +# shut down gossip and client transports and kill the JVM for any fs errors or +# single-sstable errors, so the node can be replaced. +# +# stop_paranoid +# shut down gossip and client transports even for single-sstable errors, +# kill the JVM for errors during startup. +# +# stop +# shut down gossip and client transports, leaving the node effectively dead, but +# can still be inspected via JMX, kill the JVM for errors during startup. +# +# best_effort +# stop using the failed disk and respond to requests based on +# remaining available sstables. This means you WILL see obsolete +# data at CL.ONE! +# +# ignore +# ignore fatal errors and let requests fail, as in pre-1.2 Cassandra +disk_failure_policy: stop + +# Policy for commit disk failures: +# +# die +# shut down the node and kill the JVM, so the node can be replaced. +# +# stop +# shut down the node, leaving the node effectively dead, but +# can still be inspected via JMX. +# +# stop_commit +# shutdown the commit log, letting writes collect but +# continuing to service reads, as in pre-2.0.5 Cassandra +# +# ignore +# ignore fatal errors and let the batches fail +commit_failure_policy: stop + +# Maximum size of the native protocol prepared statement cache +# +# Valid values are either "auto" (omitting the value) or a value greater 0. +# +# Note that specifying a too large value will result in long running GCs and possbily +# out-of-memory errors. Keep the value at a small fraction of the heap. +# +# If you constantly see "prepared statements discarded in the last minute because +# cache limit reached" messages, the first step is to investigate the root cause +# of these messages and check whether prepared statements are used correctly - +# i.e. use bind markers for variable parts. +# +# Do only change the default value, if you really have more prepared statements than +# fit in the cache. In most cases it is not neccessary to change this value. +# Constantly re-preparing statements is a performance penalty. +# +# Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater +prepared_statements_cache_size_mb: + +# Maximum size of the key cache in memory. +# +# Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the +# minimum, sometimes more. The key cache is fairly tiny for the amount of +# time it saves, so it's worthwhile to use it at large numbers. +# The row cache saves even more time, but must contain the entire row, +# so it is extremely space-intensive. It's best to only use the +# row cache if you have hot rows or static rows. +# +# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. +# +# Default value is empty to make it "auto" (min(5% of Heap (in MB), 100MB)). Set to 0 to disable key cache. +key_cache_size_in_mb: + +# Duration in seconds after which Cassandra should +# save the key cache. Caches are saved to saved_caches_directory as +# specified in this configuration file. +# +# Saved caches greatly improve cold-start speeds, and is relatively cheap in +# terms of I/O for the key cache. Row cache saving is much more expensive and +# has limited use. +# +# Default is 14400 or 4 hours. +key_cache_save_period: 14400 + +# Number of keys from the key cache to save +# Disabled by default, meaning all keys are going to be saved +# key_cache_keys_to_save: 100 + +# Row cache implementation class name. Available implementations: +# +# org.apache.cassandra.cache.OHCProvider +# Fully off-heap row cache implementation (default). +# +# org.apache.cassandra.cache.SerializingCacheProvider +# This is the row cache implementation availabile +# in previous releases of Cassandra. +# row_cache_class_name: org.apache.cassandra.cache.OHCProvider + +# Maximum size of the row cache in memory. +# Please note that OHC cache implementation requires some additional off-heap memory to manage +# the map structures and some in-flight memory during operations before/after cache entries can be +# accounted against the cache capacity. This overhead is usually small compared to the whole capacity. +# Do not specify more memory that the system can afford in the worst usual situation and leave some +# headroom for OS block level cache. Do never allow your system to swap. +# +# Default value is 0, to disable row caching. +row_cache_size_in_mb: 0 + +# Duration in seconds after which Cassandra should save the row cache. +# Caches are saved to saved_caches_directory as specified in this configuration file. +# +# Saved caches greatly improve cold-start speeds, and is relatively cheap in +# terms of I/O for the key cache. Row cache saving is much more expensive and +# has limited use. +# +# Default is 0 to disable saving the row cache. +row_cache_save_period: 0 + +# Number of keys from the row cache to save. +# Specify 0 (which is the default), meaning all keys are going to be saved +# row_cache_keys_to_save: 100 + +# Maximum size of the counter cache in memory. +# +# Counter cache helps to reduce counter locks' contention for hot counter cells. +# In case of RF = 1 a counter cache hit will cause Cassandra to skip the read before +# write entirely. With RF > 1 a counter cache hit will still help to reduce the duration +# of the lock hold, helping with hot counter cell updates, but will not allow skipping +# the read entirely. Only the local (clock, count) tuple of a counter cell is kept +# in memory, not the whole counter, so it's relatively cheap. +# +# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. +# +# Default value is empty to make it "auto" (min(2.5% of Heap (in MB), 50MB)). Set to 0 to disable counter cache. +# NOTE: if you perform counter deletes and rely on low gcgs, you should disable the counter cache. +counter_cache_size_in_mb: + +# Duration in seconds after which Cassandra should +# save the counter cache (keys only). Caches are saved to saved_caches_directory as +# specified in this configuration file. +# +# Default is 7200 or 2 hours. +counter_cache_save_period: 7200 + +# Number of keys from the counter cache to save +# Disabled by default, meaning all keys are going to be saved +# counter_cache_keys_to_save: 100 + +# saved caches +# If not set, the default directory is $CASSANDRA_HOME/data/saved_caches. +# saved_caches_directory: /var/lib/cassandra/saved_caches + +# Number of seconds the server will wait for each cache (row, key, etc ...) to load while starting +# the Cassandra process. Setting this to a negative value is equivalent to disabling all cache loading on startup +# while still having the cache during runtime. +# cache_load_timeout_seconds: 30 + +# commitlog_sync may be either "periodic", "group", or "batch." +# +# When in batch mode, Cassandra won't ack writes until the commit log +# has been flushed to disk. Each incoming write will trigger the flush task. +# commitlog_sync_batch_window_in_ms is a deprecated value. Previously it had +# almost no value, and is being removed. +# +# commitlog_sync_batch_window_in_ms: 2 +# +# group mode is similar to batch mode, where Cassandra will not ack writes +# until the commit log has been flushed to disk. The difference is group +# mode will wait up to commitlog_sync_group_window_in_ms between flushes. +# +# commitlog_sync_group_window_in_ms: 1000 +# +# the default option is "periodic" where writes may be acked immediately +# and the CommitLog is simply synced every commitlog_sync_period_in_ms +# milliseconds. +commitlog_sync: periodic +commitlog_sync_period_in_ms: 10000 + +# When in periodic commitlog mode, the number of milliseconds to block writes +# while waiting for a slow disk flush to complete. +# periodic_commitlog_sync_lag_block_in_ms: + +# The size of the individual commitlog file segments. A commitlog +# segment may be archived, deleted, or recycled once all the data +# in it (potentially from each columnfamily in the system) has been +# flushed to sstables. +# +# The default size is 32, which is almost always fine, but if you are +# archiving commitlog segments (see commitlog_archiving.properties), +# then you probably want a finer granularity of archiving; 8 or 16 MB +# is reasonable. +# Max mutation size is also configurable via max_mutation_size_in_kb setting in +# cassandra.yaml. The default is half the size commitlog_segment_size_in_mb * 1024. +# This should be positive and less than 2048. +# +# NOTE: If max_mutation_size_in_kb is set explicitly then commitlog_segment_size_in_mb must +# be set to at least twice the size of max_mutation_size_in_kb / 1024 +# +commitlog_segment_size_in_mb: 32 + +# Compression to apply to the commit log. If omitted, the commit log +# will be written uncompressed. LZ4, Snappy, and Deflate compressors +# are supported. +# commitlog_compression: +# - class_name: LZ4Compressor +# parameters: +# - + +# Compression to apply to SSTables as they flush for compressed tables. +# Note that tables without compression enabled do not respect this flag. +# +# As high ratio compressors like LZ4HC, Zstd, and Deflate can potentially +# block flushes for too long, the default is to flush with a known fast +# compressor in those cases. Options are: +# +# none : Flush without compressing blocks but while still doing checksums. +# fast : Flush with a fast compressor. If the table is already using a +# fast compressor that compressor is used. +# table: Always flush with the same compressor that the table uses. This +# was the pre 4.0 behavior. +# +# flush_compression: fast + +# any class that implements the SeedProvider interface and has a +# constructor that takes a Map of parameters will do. +seed_provider: + # Addresses of hosts that are deemed contact points. + # Cassandra nodes use this list of hosts to find each other and learn + # the topology of the ring. You must change this if you are running + # multiple nodes! + - class_name: org.apache.cassandra.locator.SimpleSeedProvider + parameters: + # seeds is actually a comma-delimited list of addresses. + # Ex: ",," + - seeds: "cass1,cass2" + +# For workloads with more data than can fit in memory, Cassandra's +# bottleneck will be reads that need to fetch data from +# disk. "concurrent_reads" should be set to (16 * number_of_drives) in +# order to allow the operations to enqueue low enough in the stack +# that the OS and drives can reorder them. Same applies to +# "concurrent_counter_writes", since counter writes read the current +# values before incrementing and writing them back. +# +# On the other hand, since writes are almost never IO bound, the ideal +# number of "concurrent_writes" is dependent on the number of cores in +# your system; (8 * number_of_cores) is a good rule of thumb. +concurrent_reads: 32 +concurrent_writes: 32 +concurrent_counter_writes: 32 + +# For materialized view writes, as there is a read involved, so this should +# be limited by the less of concurrent reads or concurrent writes. +concurrent_materialized_view_writes: 32 + +# Maximum memory to use for inter-node and client-server networking buffers. +# +# Defaults to the smaller of 1/16 of heap or 128MB. This pool is allocated off-heap, +# so is in addition to the memory allocated for heap. The cache also has on-heap +# overhead which is roughly 128 bytes per chunk (i.e. 0.2% of the reserved size +# if the default 64k chunk size is used). +# Memory is only allocated when needed. +# networking_cache_size_in_mb: 128 + +# Enable the sstable chunk cache. The chunk cache will store recently accessed +# sections of the sstable in-memory as uncompressed buffers. +# file_cache_enabled: false + +# Maximum memory to use for sstable chunk cache and buffer pooling. +# 32MB of this are reserved for pooling buffers, the rest is used for chunk cache +# that holds uncompressed sstable chunks. +# Defaults to the smaller of 1/4 of heap or 512MB. This pool is allocated off-heap, +# so is in addition to the memory allocated for heap. The cache also has on-heap +# overhead which is roughly 128 bytes per chunk (i.e. 0.2% of the reserved size +# if the default 64k chunk size is used). +# Memory is only allocated when needed. +# file_cache_size_in_mb: 512 + +# Flag indicating whether to allocate on or off heap when the sstable buffer +# pool is exhausted, that is when it has exceeded the maximum memory +# file_cache_size_in_mb, beyond which it will not cache buffers but allocate on request. + +# buffer_pool_use_heap_if_exhausted: true + +# The strategy for optimizing disk read +# Possible values are: +# ssd (for solid state disks, the default) +# spinning (for spinning disks) +# disk_optimization_strategy: ssd + +# Total permitted memory to use for memtables. Cassandra will stop +# accepting writes when the limit is exceeded until a flush completes, +# and will trigger a flush based on memtable_cleanup_threshold +# If omitted, Cassandra will set both to 1/4 the size of the heap. +# memtable_heap_space_in_mb: 2048 +# memtable_offheap_space_in_mb: 2048 + +# memtable_cleanup_threshold is deprecated. The default calculation +# is the only reasonable choice. See the comments on memtable_flush_writers +# for more information. +# +# Ratio of occupied non-flushing memtable size to total permitted size +# that will trigger a flush of the largest memtable. Larger mct will +# mean larger flushes and hence less compaction, but also less concurrent +# flush activity which can make it difficult to keep your disks fed +# under heavy write load. +# +# memtable_cleanup_threshold defaults to 1 / (memtable_flush_writers + 1) +# memtable_cleanup_threshold: 0.11 + +# Specify the way Cassandra allocates and manages memtable memory. +# Options are: +# +# heap_buffers +# on heap nio buffers +# +# offheap_buffers +# off heap (direct) nio buffers +# +# offheap_objects +# off heap objects +memtable_allocation_type: heap_buffers + +# Limit memory usage for Merkle tree calculations during repairs. The default +# is 1/16th of the available heap. The main tradeoff is that smaller trees +# have less resolution, which can lead to over-streaming data. If you see heap +# pressure during repairs, consider lowering this, but you cannot go below +# one megabyte. If you see lots of over-streaming, consider raising +# this or using subrange repair. +# +# For more details see https://issues.apache.org/jira/browse/CASSANDRA-14096. +# +# repair_session_space_in_mb: + +# Total space to use for commit logs on disk. +# +# If space gets above this value, Cassandra will flush every dirty CF +# in the oldest segment and remove it. So a small total commitlog space +# will tend to cause more flush activity on less-active columnfamilies. +# +# The default value is the smaller of 8192, and 1/4 of the total space +# of the commitlog volume. +# +# commitlog_total_space_in_mb: 8192 + +# This sets the number of memtable flush writer threads per disk +# as well as the total number of memtables that can be flushed concurrently. +# These are generally a combination of compute and IO bound. +# +# Memtable flushing is more CPU efficient than memtable ingest and a single thread +# can keep up with the ingest rate of a whole server on a single fast disk +# until it temporarily becomes IO bound under contention typically with compaction. +# At that point you need multiple flush threads. At some point in the future +# it may become CPU bound all the time. +# +# You can tell if flushing is falling behind using the MemtablePool.BlockedOnAllocation +# metric which should be 0, but will be non-zero if threads are blocked waiting on flushing +# to free memory. +# +# memtable_flush_writers defaults to two for a single data directory. +# This means that two memtables can be flushed concurrently to the single data directory. +# If you have multiple data directories the default is one memtable flushing at a time +# but the flush will use a thread per data directory so you will get two or more writers. +# +# Two is generally enough to flush on a fast disk [array] mounted as a single data directory. +# Adding more flush writers will result in smaller more frequent flushes that introduce more +# compaction overhead. +# +# There is a direct tradeoff between number of memtables that can be flushed concurrently +# and flush size and frequency. More is not better you just need enough flush writers +# to never stall waiting for flushing to free memory. +# +#memtable_flush_writers: 2 + +# Total space to use for change-data-capture logs on disk. +# +# If space gets above this value, Cassandra will throw WriteTimeoutException +# on Mutations including tables with CDC enabled. A CDCCompactor is responsible +# for parsing the raw CDC logs and deleting them when parsing is completed. +# +# The default value is the min of 4096 mb and 1/8th of the total space +# of the drive where cdc_raw_directory resides. +# cdc_total_space_in_mb: 4096 + +# When we hit our cdc_raw limit and the CDCCompactor is either running behind +# or experiencing backpressure, we check at the following interval to see if any +# new space for cdc-tracked tables has been made available. Default to 250ms +# cdc_free_space_check_interval_ms: 250 + +# A fixed memory pool size in MB for for SSTable index summaries. If left +# empty, this will default to 5% of the heap size. If the memory usage of +# all index summaries exceeds this limit, SSTables with low read rates will +# shrink their index summaries in order to meet this limit. However, this +# is a best-effort process. In extreme conditions Cassandra may need to use +# more than this amount of memory. +index_summary_capacity_in_mb: + +# How frequently index summaries should be resampled. This is done +# periodically to redistribute memory from the fixed-size pool to sstables +# proportional their recent read rates. Setting to -1 will disable this +# process, leaving existing index summaries at their current sampling level. +index_summary_resize_interval_in_minutes: 60 + +# Whether to, when doing sequential writing, fsync() at intervals in +# order to force the operating system to flush the dirty +# buffers. Enable this to avoid sudden dirty buffer flushing from +# impacting read latencies. Almost always a good idea on SSDs; not +# necessarily on platters. +trickle_fsync: false +trickle_fsync_interval_in_kb: 10240 + +# TCP port, for commands and data +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +storage_port: 7000 + +# SSL port, for legacy encrypted communication. This property is unused unless enabled in +# server_encryption_options (see below). As of cassandra 4.0, this property is deprecated +# as a single port can be used for either/both secure and insecure connections. +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +ssl_storage_port: 7001 + +# Address or interface to bind to and tell other Cassandra nodes to connect to. +# You _must_ change this if you want multiple nodes to be able to communicate! +# +# Set listen_address OR listen_interface, not both. +# +# Leaving it blank leaves it up to InetAddress.getLocalHost(). This +# will always do the Right Thing _if_ the node is properly configured +# (hostname, name resolution, etc), and the Right Thing is to use the +# address associated with the hostname (it might not be). If unresolvable +# it will fall back to InetAddress.getLoopbackAddress(), which is wrong for production systems. +# +# Setting listen_address to 0.0.0.0 is always wrong. +# +listen_address: 172.20.0.8 + +# Set listen_address OR listen_interface, not both. Interfaces must correspond +# to a single address, IP aliasing is not supported. +# listen_interface: eth0 + +# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address +# you can specify which should be chosen using listen_interface_prefer_ipv6. If false the first ipv4 +# address will be used. If true the first ipv6 address will be used. Defaults to false preferring +# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6. +# listen_interface_prefer_ipv6: false + +# Address to broadcast to other Cassandra nodes +# Leaving this blank will set it to the same value as listen_address +broadcast_address: 172.20.0.8 + +# When using multiple physical network interfaces, set this +# to true to listen on broadcast_address in addition to +# the listen_address, allowing nodes to communicate in both +# interfaces. +# Ignore this property if the network configuration automatically +# routes between the public and private networks such as EC2. +# listen_on_broadcast_address: false + +# Internode authentication backend, implementing IInternodeAuthenticator; +# used to allow/disallow connections from peer nodes. +# internode_authenticator: org.apache.cassandra.auth.AllowAllInternodeAuthenticator + +# Whether to start the native transport server. +# The address on which the native transport is bound is defined by rpc_address. +start_native_transport: true +# port for the CQL native transport to listen for clients on +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +native_transport_port: 9042 +# Enabling native transport encryption in client_encryption_options allows you to either use +# encryption for the standard port or to use a dedicated, additional port along with the unencrypted +# standard native_transport_port. +# Enabling client encryption and keeping native_transport_port_ssl disabled will use encryption +# for native_transport_port. Setting native_transport_port_ssl to a different value +# from native_transport_port will use encryption for native_transport_port_ssl while +# keeping native_transport_port unencrypted. +# native_transport_port_ssl: 9142 +# The maximum threads for handling requests (note that idle threads are stopped +# after 30 seconds so there is not corresponding minimum setting). +# native_transport_max_threads: 128 +# +# The maximum size of allowed frame. Frame (requests) larger than this will +# be rejected as invalid. The default is 256MB. If you're changing this parameter, +# you may want to adjust max_value_size_in_mb accordingly. This should be positive and less than 2048. +# native_transport_max_frame_size_in_mb: 256 + +# The maximum number of concurrent client connections. +# The default is -1, which means unlimited. +# native_transport_max_concurrent_connections: -1 + +# The maximum number of concurrent client connections per source ip. +# The default is -1, which means unlimited. +# native_transport_max_concurrent_connections_per_ip: -1 + +# Controls whether Cassandra honors older, yet currently supported, protocol versions. +# The default is true, which means all supported protocols will be honored. +native_transport_allow_older_protocols: true + +# Controls when idle client connections are closed. Idle connections are ones that had neither reads +# nor writes for a time period. +# +# Clients may implement heartbeats by sending OPTIONS native protocol message after a timeout, which +# will reset idle timeout timer on the server side. To close idle client connections, corresponding +# values for heartbeat intervals have to be set on the client side. +# +# Idle connection timeouts are disabled by default. +# native_transport_idle_timeout_in_ms: 60000 + +# The address or interface to bind the native transport server to. +# +# Set rpc_address OR rpc_interface, not both. +# +# Leaving rpc_address blank has the same effect as on listen_address +# (i.e. it will be based on the configured hostname of the node). +# +# Note that unlike listen_address, you can specify 0.0.0.0, but you must also +# set broadcast_rpc_address to a value other than 0.0.0.0. +# +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +rpc_address: 0.0.0.0 + +# Set rpc_address OR rpc_interface, not both. Interfaces must correspond +# to a single address, IP aliasing is not supported. +# rpc_interface: eth1 + +# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address +# you can specify which should be chosen using rpc_interface_prefer_ipv6. If false the first ipv4 +# address will be used. If true the first ipv6 address will be used. Defaults to false preferring +# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6. +# rpc_interface_prefer_ipv6: false + +# RPC address to broadcast to drivers and other Cassandra nodes. This cannot +# be set to 0.0.0.0. If left blank, this will be set to the value of +# rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must +# be set. +broadcast_rpc_address: 172.20.0.8 + +# enable or disable keepalive on rpc/native connections +rpc_keepalive: true + +# Uncomment to set socket buffer size for internode communication +# Note that when setting this, the buffer size is limited by net.core.wmem_max +# and when not setting it it is defined by net.ipv4.tcp_wmem +# See also: +# /proc/sys/net/core/wmem_max +# /proc/sys/net/core/rmem_max +# /proc/sys/net/ipv4/tcp_wmem +# /proc/sys/net/ipv4/tcp_wmem +# and 'man tcp' +# internode_socket_send_buffer_size_in_bytes: + +# Uncomment to set socket buffer size for internode communication +# Note that when setting this, the buffer size is limited by net.core.wmem_max +# and when not setting it it is defined by net.ipv4.tcp_wmem +# internode_socket_receive_buffer_size_in_bytes: + +# Set to true to have Cassandra create a hard link to each sstable +# flushed or streamed locally in a backups/ subdirectory of the +# keyspace data. Removing these links is the operator's +# responsibility. +incremental_backups: false + +# Whether or not to take a snapshot before each compaction. Be +# careful using this option, since Cassandra won't clean up the +# snapshots for you. Mostly useful if you're paranoid when there +# is a data format change. +snapshot_before_compaction: false + +# Whether or not a snapshot is taken of the data before keyspace truncation +# or dropping of column families. The STRONGLY advised default of true +# should be used to provide data safety. If you set this flag to false, you will +# lose data on truncation or drop. +auto_snapshot: true + +# The act of creating or clearing a snapshot involves creating or removing +# potentially tens of thousands of links, which can cause significant performance +# impact, especially on consumer grade SSDs. A non-zero value here can +# be used to throttle these links to avoid negative performance impact of +# taking and clearing snapshots +snapshot_links_per_second: 0 + +# Granularity of the collation index of rows within a partition. +# Increase if your rows are large, or if you have a very large +# number of rows per partition. The competing goals are these: +# +# - a smaller granularity means more index entries are generated +# and looking up rows withing the partition by collation column +# is faster +# - but, Cassandra will keep the collation index in memory for hot +# rows (as part of the key cache), so a larger granularity means +# you can cache more hot rows +column_index_size_in_kb: 64 + +# Per sstable indexed key cache entries (the collation index in memory +# mentioned above) exceeding this size will not be held on heap. +# This means that only partition information is held on heap and the +# index entries are read from disk. +# +# Note that this size refers to the size of the +# serialized index information and not the size of the partition. +column_index_cache_size_in_kb: 2 + +# Number of simultaneous compactions to allow, NOT including +# validation "compactions" for anti-entropy repair. Simultaneous +# compactions can help preserve read performance in a mixed read/write +# workload, by mitigating the tendency of small sstables to accumulate +# during a single long running compactions. The default is usually +# fine and if you experience problems with compaction running too +# slowly or too fast, you should look at +# compaction_throughput_mb_per_sec first. +# +# concurrent_compactors defaults to the smaller of (number of disks, +# number of cores), with a minimum of 2 and a maximum of 8. +# +# If your data directories are backed by SSD, you should increase this +# to the number of cores. +#concurrent_compactors: 1 + +# Number of simultaneous repair validations to allow. If not set or set to +# a value less than 1, it defaults to the value of concurrent_compactors. +# To set a value greeater than concurrent_compactors at startup, the system +# property cassandra.allow_unlimited_concurrent_validations must be set to +# true. To dynamically resize to a value > concurrent_compactors on a running +# node, first call the bypassConcurrentValidatorsLimit method on the +# org.apache.cassandra.db:type=StorageService mbean +# concurrent_validations: 0 + +# Number of simultaneous materialized view builder tasks to allow. +concurrent_materialized_view_builders: 1 + +# Throttles compaction to the given total throughput across the entire +# system. The faster you insert data, the faster you need to compact in +# order to keep the sstable count down, but in general, setting this to +# 16 to 32 times the rate you are inserting data is more than sufficient. +# Setting this to 0 disables throttling. Note that this accounts for all types +# of compaction, including validation compaction (building Merkle trees +# for repairs). +compaction_throughput_mb_per_sec: 64 + +# When compacting, the replacement sstable(s) can be opened before they +# are completely written, and used in place of the prior sstables for +# any range that has been written. This helps to smoothly transfer reads +# between the sstables, reducing page cache churn and keeping hot rows hot +sstable_preemptive_open_interval_in_mb: 50 + +# When enabled, permits Cassandra to zero-copy stream entire eligible +# SSTables between nodes, including every component. +# This speeds up the network transfer significantly subject to +# throttling specified by stream_throughput_outbound_megabits_per_sec. +# Enabling this will reduce the GC pressure on sending and receiving node. +# When unset, the default is enabled. While this feature tries to keep the +# disks balanced, it cannot guarantee it. This feature will be automatically +# disabled if internode encryption is enabled. +# stream_entire_sstables: true + +# Throttles all outbound streaming file transfers on this node to the +# given total throughput in Mbps. This is necessary because Cassandra does +# mostly sequential IO when streaming data during bootstrap or repair, which +# can lead to saturating the network connection and degrading rpc performance. +# When unset, the default is 200 Mbps or 25 MB/s. +# stream_throughput_outbound_megabits_per_sec: 200 + +# Throttles all streaming file transfer between the datacenters, +# this setting allows users to throttle inter dc stream throughput in addition +# to throttling all network stream traffic as configured with +# stream_throughput_outbound_megabits_per_sec +# When unset, the default is 200 Mbps or 25 MB/s +# inter_dc_stream_throughput_outbound_megabits_per_sec: 200 + +# Server side timeouts for requests. The server will return a timeout exception +# to the client if it can't complete an operation within the corresponding +# timeout. Those settings are a protection against: +# 1) having client wait on an operation that might never terminate due to some +# failures. +# 2) operations that use too much CPU/read too much data (leading to memory build +# up) by putting a limit to how long an operation will execute. +# For this reason, you should avoid putting these settings too high. In other words, +# if you are timing out requests because of underlying resource constraints then +# increasing the timeout will just cause more problems. Of course putting them too +# low is equally ill-advised since clients could get timeouts even for successful +# operations just because the timeout setting is too tight. + +# How long the coordinator should wait for read operations to complete. +# Lowest acceptable value is 10 ms. +read_request_timeout_in_ms: 50000 +# How long the coordinator should wait for seq or index scans to complete. +# Lowest acceptable value is 10 ms. +range_request_timeout_in_ms: 10000 +# How long the coordinator should wait for writes to complete. +# Lowest acceptable value is 10 ms. +write_request_timeout_in_ms: 20000 +# How long the coordinator should wait for counter writes to complete. +# Lowest acceptable value is 10 ms. +counter_write_request_timeout_in_ms: 5000 +# How long a coordinator should continue to retry a CAS operation +# that contends with other proposals for the same row. +# Lowest acceptable value is 10 ms. +cas_contention_timeout_in_ms: 1000 +# How long the coordinator should wait for truncates to complete +# (This can be much longer, because unless auto_snapshot is disabled +# we need to flush first so we can snapshot before removing the data.) +# Lowest acceptable value is 10 ms. +truncate_request_timeout_in_ms: 60000 +# The default timeout for other, miscellaneous operations. +# Lowest acceptable value is 10 ms. +request_timeout_in_ms: 10000 + +# Defensive settings for protecting Cassandra from true network partitions. +# See (CASSANDRA-14358) for details. +# +# The amount of time to wait for internode tcp connections to establish. +# internode_tcp_connect_timeout_in_ms: 2000 +# +# The amount of time unacknowledged data is allowed on a connection before we throw out the connection +# Note this is only supported on Linux + epoll, and it appears to behave oddly above a setting of 30000 +# (it takes much longer than 30s) as of Linux 4.12. If you want something that high set this to 0 +# which picks up the OS default and configure the net.ipv4.tcp_retries2 sysctl to be ~8. +# internode_tcp_user_timeout_in_ms: 30000 + +# The amount of time unacknowledged data is allowed on a streaming connection. +# The default is 5 minutes. Increase it or set it to 0 in order to increase the timeout. +# internode_streaming_tcp_user_timeout_in_ms: 300000 + +# Global, per-endpoint and per-connection limits imposed on messages queued for delivery to other nodes +# and waiting to be processed on arrival from other nodes in the cluster. These limits are applied to the on-wire +# size of the message being sent or received. +# +# The basic per-link limit is consumed in isolation before any endpoint or global limit is imposed. +# Each node-pair has three links: urgent, small and large. So any given node may have a maximum of +# N*3*(internode_application_send_queue_capacity_in_bytes+internode_application_receive_queue_capacity_in_bytes) +# messages queued without any coordination between them although in practice, with token-aware routing, only RF*tokens +# nodes should need to communicate with significant bandwidth. +# +# The per-endpoint limit is imposed on all messages exceeding the per-link limit, simultaneously with the global limit, +# on all links to or from a single node in the cluster. +# The global limit is imposed on all messages exceeding the per-link limit, simultaneously with the per-endpoint limit, +# on all links to or from any node in the cluster. +# +# internode_application_send_queue_capacity_in_bytes: 4194304 #4MiB +# internode_application_send_queue_reserve_endpoint_capacity_in_bytes: 134217728 #128MiB +# internode_application_send_queue_reserve_global_capacity_in_bytes: 536870912 #512MiB +# internode_application_receive_queue_capacity_in_bytes: 4194304 #4MiB +# internode_application_receive_queue_reserve_endpoint_capacity_in_bytes: 134217728 #128MiB +# internode_application_receive_queue_reserve_global_capacity_in_bytes: 536870912 #512MiB + + +# How long before a node logs slow queries. Select queries that take longer than +# this timeout to execute, will generate an aggregated log message, so that slow queries +# can be identified. Set this value to zero to disable slow query logging. +slow_query_log_timeout_in_ms: 500 + +# Enable operation timeout information exchange between nodes to accurately +# measure request timeouts. If disabled, replicas will assume that requests +# were forwarded to them instantly by the coordinator, which means that +# under overload conditions we will waste that much extra time processing +# already-timed-out requests. +# +# Warning: It is generally assumed that users have setup NTP on their clusters, and that clocks are modestly in sync, +# since this is a requirement for general correctness of last write wins. +#cross_node_timeout: true + +# Set keep-alive period for streaming +# This node will send a keep-alive message periodically with this period. +# If the node does not receive a keep-alive message from the peer for +# 2 keep-alive cycles the stream session times out and fail +# Default value is 300s (5 minutes), which means stalled stream +# times out in 10 minutes by default +# streaming_keep_alive_period_in_secs: 300 + +# Limit number of connections per host for streaming +# Increase this when you notice that joins are CPU-bound rather that network +# bound (for example a few nodes with big files). +# streaming_connections_per_host: 1 + + +# phi value that must be reached for a host to be marked down. +# most users should never need to adjust this. +# phi_convict_threshold: 8 + +# endpoint_snitch -- Set this to a class that implements +# IEndpointSnitch. The snitch has two functions: +# +# - it teaches Cassandra enough about your network topology to route +# requests efficiently +# - it allows Cassandra to spread replicas around your cluster to avoid +# correlated failures. It does this by grouping machines into +# "datacenters" and "racks." Cassandra will do its best not to have +# more than one replica on the same "rack" (which may not actually +# be a physical location) +# +# CASSANDRA WILL NOT ALLOW YOU TO SWITCH TO AN INCOMPATIBLE SNITCH +# ONCE DATA IS INSERTED INTO THE CLUSTER. This would cause data loss. +# This means that if you start with the default SimpleSnitch, which +# locates every node on "rack1" in "datacenter1", your only options +# if you need to add another datacenter are GossipingPropertyFileSnitch +# (and the older PFS). From there, if you want to migrate to an +# incompatible snitch like Ec2Snitch you can do it by adding new nodes +# under Ec2Snitch (which will locate them in a new "datacenter") and +# decommissioning the old ones. +# +# Out of the box, Cassandra provides: +# +# SimpleSnitch: +# Treats Strategy order as proximity. This can improve cache +# locality when disabling read repair. Only appropriate for +# single-datacenter deployments. +# +# GossipingPropertyFileSnitch +# This should be your go-to snitch for production use. The rack +# and datacenter for the local node are defined in +# cassandra-rackdc.properties and propagated to other nodes via +# gossip. If cassandra-topology.properties exists, it is used as a +# fallback, allowing migration from the PropertyFileSnitch. +# +# PropertyFileSnitch: +# Proximity is determined by rack and data center, which are +# explicitly configured in cassandra-topology.properties. +# +# Ec2Snitch: +# Appropriate for EC2 deployments in a single Region. Loads Region +# and Availability Zone information from the EC2 API. The Region is +# treated as the datacenter, and the Availability Zone as the rack. +# Only private IPs are used, so this will not work across multiple +# Regions. +# +# Ec2MultiRegionSnitch: +# Uses public IPs as broadcast_address to allow cross-region +# connectivity. (Thus, you should set seed addresses to the public +# IP as well.) You will need to open the storage_port or +# ssl_storage_port on the public IP firewall. (For intra-Region +# traffic, Cassandra will switch to the private IP after +# establishing a connection.) +# +# RackInferringSnitch: +# Proximity is determined by rack and data center, which are +# assumed to correspond to the 3rd and 2nd octet of each node's IP +# address, respectively. Unless this happens to match your +# deployment conventions, this is best used as an example of +# writing a custom Snitch class and is provided in that spirit. +# +# You can use a custom Snitch by setting this to the full class name +# of the snitch, which will be assumed to be on your classpath. +endpoint_snitch: GossipingPropertyFileSnitch + +# controls how often to perform the more expensive part of host score +# calculation +dynamic_snitch_update_interval_in_ms: 100 +# controls how often to reset all host scores, allowing a bad host to +# possibly recover +dynamic_snitch_reset_interval_in_ms: 600000 +# if set greater than zero, this will allow +# 'pinning' of replicas to hosts in order to increase cache capacity. +# The badness threshold will control how much worse the pinned host has to be +# before the dynamic snitch will prefer other replicas over it. This is +# expressed as a double which represents a percentage. Thus, a value of +# 0.2 means Cassandra would continue to prefer the static snitch values +# until the pinned host was 20% worse than the fastest. +dynamic_snitch_badness_threshold: 1.0 + +# Configure server-to-server internode encryption +# +# JVM and netty defaults for supported SSL socket protocols and cipher suites can +# be replaced using custom encryption options. This is not recommended +# unless you have policies in place that dictate certain settings, or +# need to disable vulnerable ciphers or protocols in case the JVM cannot +# be updated. +# +# FIPS compliant settings can be configured at JVM level and should not +# involve changing encryption settings here: +# https://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/FIPS.html +# +# **NOTE** this default configuration is an insecure configuration. If you need to +# enable server-to-server encryption generate server keystores (and truststores for mutual +# authentication) per: +# http://download.oracle.com/javase/8/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore +# Then perform the following configuration changes: +# +# Step 1: Set internode_encryption= and explicitly set optional=true. Restart all nodes +# +# Step 2: Set optional=false (or remove it) and if you generated truststores and want to use mutual +# auth set require_client_auth=true. Restart all nodes +server_encryption_options: + # On outbound connections, determine which type of peers to securely connect to. + # The available options are : + # none : Do not encrypt outgoing connections + # dc : Encrypt connections to peers in other datacenters but not within datacenters + # rack : Encrypt connections to peers in other racks but not within racks + # all : Always use encrypted connections + internode_encryption: none + # When set to true, encrypted and unencrypted connections are allowed on the storage_port + # This should _only be true_ while in unencrypted or transitional operation + # optional defaults to true if internode_encryption is none + # optional: true + # If enabled, will open up an encrypted listening socket on ssl_storage_port. Should only be used + # during upgrade to 4.0; otherwise, set to false. + enable_legacy_ssl_storage_port: false + # Set to a valid keystore if internode_encryption is dc, rack or all + keystore: conf/.keystore + keystore_password: cassandra + # Verify peer server certificates + require_client_auth: false + # Set to a valid trustore if require_client_auth is true + truststore: conf/.truststore + truststore_password: cassandra + # Verify that the host name in the certificate matches the connected host + require_endpoint_verification: false + # More advanced defaults: + # protocol: TLS + # store_type: JKS + # cipher_suites: [ + # TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, + # TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA, + # TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, TLS_RSA_WITH_AES_128_GCM_SHA256, TLS_RSA_WITH_AES_128_CBC_SHA, + # TLS_RSA_WITH_AES_256_CBC_SHA + # ] + +# Configure client-to-server encryption. +# +# **NOTE** this default configuration is an insecure configuration. If you need to +# enable client-to-server encryption generate server keystores (and truststores for mutual +# authentication) per: +# http://download.oracle.com/javase/8/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore +# Then perform the following configuration changes: +# +# Step 1: Set enabled=true and explicitly set optional=true. Restart all nodes +# +# Step 2: Set optional=false (or remove it) and if you generated truststores and want to use mutual +# auth set require_client_auth=true. Restart all nodes +client_encryption_options: + # Enable client-to-server encryption + enabled: false + # When set to true, encrypted and unencrypted connections are allowed on the native_transport_port + # This should _only be true_ while in unencrypted or transitional operation + # optional defaults to true when enabled is false, and false when enabled is true. + # optional: true + # Set keystore and keystore_password to valid keystores if enabled is true + keystore: conf/.keystore + keystore_password: cassandra + # Verify client certificates + require_client_auth: false + # Set trustore and truststore_password if require_client_auth is true + # truststore: conf/.truststore + # truststore_password: cassandra + # More advanced defaults: + # protocol: TLS + # store_type: JKS + # cipher_suites: [ + # TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, + # TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA, + # TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, TLS_RSA_WITH_AES_128_GCM_SHA256, TLS_RSA_WITH_AES_128_CBC_SHA, + # TLS_RSA_WITH_AES_256_CBC_SHA + # ] + +# internode_compression controls whether traffic between nodes is +# compressed. +# Can be: +# +# all +# all traffic is compressed +# +# dc +# traffic between different datacenters is compressed +# +# none +# nothing is compressed. +internode_compression: dc + +# Enable or disable tcp_nodelay for inter-dc communication. +# Disabling it will result in larger (but fewer) network packets being sent, +# reducing overhead from the TCP protocol itself, at the cost of increasing +# latency if you block for cross-datacenter responses. +inter_dc_tcp_nodelay: false + +# TTL for different trace types used during logging of the repair process. +tracetype_query_ttl: 86400 +tracetype_repair_ttl: 604800 + +# If unset, all GC Pauses greater than gc_log_threshold_in_ms will log at +# INFO level +# UDFs (user defined functions) are disabled by default. +# As of Cassandra 3.0 there is a sandbox in place that should prevent execution of evil code. +enable_user_defined_functions: false + +# Enables scripted UDFs (JavaScript UDFs). +# Java UDFs are always enabled, if enable_user_defined_functions is true. +# Enable this option to be able to use UDFs with "language javascript" or any custom JSR-223 provider. +# This option has no effect, if enable_user_defined_functions is false. +enable_scripted_user_defined_functions: false + +# The default Windows kernel timer and scheduling resolution is 15.6ms for power conservation. +# Lowering this value on Windows can provide much tighter latency and better throughput, however +# some virtualized environments may see a negative performance impact from changing this setting +# below their system default. The sysinternals 'clockres' tool can confirm your system's default +# setting. +windows_timer_interval: 1 + + +# Enables encrypting data at-rest (on disk). Different key providers can be plugged in, but the default reads from +# a JCE-style keystore. A single keystore can hold multiple keys, but the one referenced by +# the "key_alias" is the only key that will be used for encrypt opertaions; previously used keys +# can still (and should!) be in the keystore and will be used on decrypt operations +# (to handle the case of key rotation). +# +# It is strongly recommended to download and install Java Cryptography Extension (JCE) +# Unlimited Strength Jurisdiction Policy Files for your version of the JDK. +# (current link: http://www.oracle.com/technetwork/java/javase/downloads/jce8-download-2133166.html) +# +# Currently, only the following file types are supported for transparent data encryption, although +# more are coming in future cassandra releases: commitlog, hints +transparent_data_encryption_options: + enabled: false + chunk_length_kb: 64 + cipher: AES/CBC/PKCS5Padding + key_alias: testing:1 + # CBC IV length for AES needs to be 16 bytes (which is also the default size) + # iv_length: 16 + key_provider: + - class_name: org.apache.cassandra.security.JKSKeyProvider + parameters: + - keystore: conf/.keystore + keystore_password: cassandra + store_type: JCEKS + key_password: cassandra + + +##################### +# SAFETY THRESHOLDS # +##################### + +# When executing a scan, within or across a partition, we need to keep the +# tombstones seen in memory so we can return them to the coordinator, which +# will use them to make sure other replicas also know about the deleted rows. +# With workloads that generate a lot of tombstones, this can cause performance +# problems and even exaust the server heap. +# (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets) +# Adjust the thresholds here if you understand the dangers and want to +# scan more tombstones anyway. These thresholds may also be adjusted at runtime +# using the StorageService mbean. +tombstone_warn_threshold: 1000 +tombstone_failure_threshold: 100000 + +# Filtering and secondary index queries at read consistency levels above ONE/LOCAL_ONE use a +# mechanism called replica filtering protection to ensure that results from stale replicas do +# not violate consistency. (See CASSANDRA-8272 and CASSANDRA-15907 for more details.) This +# mechanism materializes replica results by partition on-heap at the coordinator. The more possibly +# stale results returned by the replicas, the more rows materialized during the query. +replica_filtering_protection: + # These thresholds exist to limit the damage severely out-of-date replicas can cause during these + # queries. They limit the number of rows from all replicas individual index and filtering queries + # can materialize on-heap to return correct results at the desired read consistency level. + # + # "cached_replica_rows_warn_threshold" is the per-query threshold at which a warning will be logged. + # "cached_replica_rows_fail_threshold" is the per-query threshold at which the query will fail. + # + # These thresholds may also be adjusted at runtime using the StorageService mbean. + # + # If the failure threshold is breached, it is likely that either the current page/fetch size + # is too large or one or more replicas is severely out-of-sync and in need of repair. + cached_rows_warn_threshold: 2000 + cached_rows_fail_threshold: 32000 + +# Log WARN on any multiple-partition batch size exceeding this value. 5kb per batch by default. +# Caution should be taken on increasing the size of this threshold as it can lead to node instability. +batch_size_warn_threshold_in_kb: 5 + +# Fail any multiple-partition batch exceeding this value. 50kb (10x warn threshold) by default. +batch_size_fail_threshold_in_kb: 50 + +# Log WARN on any batches not of type LOGGED than span across more partitions than this limit +unlogged_batch_across_partitions_warn_threshold: 10 + +# Log a warning when compacting partitions larger than this value +compaction_large_partition_warning_threshold_mb: 100 + +# GC Pauses greater than 200 ms will be logged at INFO level +# This threshold can be adjusted to minimize logging if necessary +# gc_log_threshold_in_ms: 200 + +# GC Pauses greater than gc_warn_threshold_in_ms will be logged at WARN level +# Adjust the threshold based on your application throughput requirement. Setting to 0 +# will deactivate the feature. +# gc_warn_threshold_in_ms: 1000 + +# Maximum size of any value in SSTables. Safety measure to detect SSTable corruption +# early. Any value size larger than this threshold will result into marking an SSTable +# as corrupted. This should be positive and less than 2048. +# max_value_size_in_mb: 256 + +# Track a metric per keyspace indicating whether replication achieved the ideal consistency +# level for writes without timing out. This is different from the consistency level requested by +# each write which may be lower in order to facilitate availability. +# ideal_consistency_level: EACH_QUORUM + +# Automatically upgrade sstables after upgrade - if there is no ordinary compaction to do, the +# oldest non-upgraded sstable will get upgraded to the latest version +# automatic_sstable_upgrade: false +# Limit the number of concurrent sstable upgrades +# max_concurrent_automatic_sstable_upgrades: 1 + +# Audit logging - Logs every incoming CQL command request, authentication to a node. See the docs +# on audit_logging for full details about the various configuration options. +audit_logging_options: + enabled: false + logger: + - class_name: BinAuditLogger + # audit_logs_dir: + # included_keyspaces: + # excluded_keyspaces: system, system_schema, system_virtual_schema + # included_categories: + # excluded_categories: + # included_users: + # excluded_users: + # roll_cycle: HOURLY + # block: true + # max_queue_weight: 268435456 # 256 MiB + # max_log_size: 17179869184 # 16 GiB + ## archive command is "/path/to/script.sh %path" where %path is replaced with the file being rolled: + # archive_command: + # max_archive_retries: 10 + + +# default options for full query logging - these can be overridden from command line when executing +# nodetool enablefullquerylog +#full_query_logging_options: + # log_dir: + # roll_cycle: HOURLY + # block: true + # max_queue_weight: 268435456 # 256 MiB + # max_log_size: 17179869184 # 16 GiB + ## archive command is "/path/to/script.sh %path" where %path is replaced with the file being rolled: + # archive_command: + # max_archive_retries: 10 + +# validate tombstones on reads and compaction +# can be either "disabled", "warn" or "exception" +# corrupted_tombstone_strategy: disabled + +# Diagnostic Events # +# If enabled, diagnostic events can be helpful for troubleshooting operational issues. Emitted events contain details +# on internal state and temporal relationships across events, accessible by clients via JMX. +diagnostic_events_enabled: false + +# Use native transport TCP message coalescing. If on upgrade to 4.0 you found your throughput decreasing, and in +# particular you run an old kernel or have very fewer client connections, this option might be worth evaluating. +#native_transport_flush_in_batches_legacy: false + +# Enable tracking of repaired state of data during reads and comparison between replicas +# Mismatches between the repaired sets of replicas can be characterized as either confirmed +# or unconfirmed. In this context, unconfirmed indicates that the presence of pending repair +# sessions, unrepaired partition tombstones, or some other condition means that the disparity +# cannot be considered conclusive. Confirmed mismatches should be a trigger for investigation +# as they may be indicative of corruption or data loss. +# There are separate flags for range vs partition reads as single partition reads are only tracked +# when CL > 1 and a digest mismatch occurs. Currently, range queries don't use digests so if +# enabled for range reads, all range reads will include repaired data tracking. As this adds +# some overhead, operators may wish to disable it whilst still enabling it for partition reads +repaired_data_tracking_for_range_reads_enabled: false +repaired_data_tracking_for_partition_reads_enabled: false +# If false, only confirmed mismatches will be reported. If true, a separate metric for unconfirmed +# mismatches will also be recorded. This is to avoid potential signal:noise issues are unconfirmed +# mismatches are less actionable than confirmed ones. +report_unconfirmed_repaired_data_mismatches: false + +# Having many tables and/or keyspaces negatively affects performance of many operations in the +# cluster. When the number of tables/keyspaces in the cluster exceeds the following thresholds +# a client warning will be sent back to the user when creating a table or keyspace. +# table_count_warn_threshold: 150 +# keyspace_count_warn_threshold: 40 + +######################### +# EXPERIMENTAL FEATURES # +######################### + +# Enables materialized view creation on this node. +# Materialized views are considered experimental and are not recommended for production use. +enable_materialized_views: true + +# Enables SASI index creation on this node. +# SASI indexes are considered experimental and are not recommended for production use. +enable_sasi_indexes: true + +# Enables creation of transiently replicated keyspaces on this node. +# Transient replication is experimental and is not recommended for production use. +enable_transient_replication: false + +# Enables the used of 'ALTER ... DROP COMPACT STORAGE' statements on this node. +# 'ALTER ... DROP COMPACT STORAGE' is considered experimental and is not recommended for production use. +enable_drop_compact_storage: false diff --git a/etc/cass3/commitlog_archiving.properties b/etc/cass3/commitlog_archiving.properties new file mode 100644 index 0000000..393259c --- /dev/null +++ b/etc/cass3/commitlog_archiving.properties @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# commitlog archiving configuration. Leave blank to disable. + +# Command to execute to archive a commitlog segment +# Parameters: %path => Fully qualified path of the segment to archive +# %name => Name of the commit log. +# Example: archive_command=/bin/ln %path /backup/%name +# +# Limitation: *_command= expects one command with arguments. STDOUT +# and STDIN or multiple commands cannot be executed. You might want +# to script multiple commands and add a pointer here. +archive_command= + +# Command to execute to make an archived commitlog live again. +# Parameters: %from is the full path to an archived commitlog segment (from restore_directories) +# %to is the live commitlog directory +# Example: restore_command=/bin/cp -f %from %to +restore_command= + +# Directory to scan the recovery files in. +restore_directories= + +# Restore mutations created up to and including this timestamp in GMT. +# Format: yyyy:MM:dd HH:mm:ss (2012:04:31 20:43:12) +# +# Recovery will continue through the segment when the first client-supplied +# timestamp greater than this time is encountered, but only mutations less than +# or equal to this timestamp will be applied. +restore_point_in_time= + +# precision of the timestamp used in the inserts (MILLISECONDS, MICROSECONDS, ...) +precision=MICROSECONDS diff --git a/etc/cass3/jvm-clients.options b/etc/cass3/jvm-clients.options new file mode 100644 index 0000000..6181ed0 --- /dev/null +++ b/etc/cass3/jvm-clients.options @@ -0,0 +1,10 @@ +########################################################################### +# jvm-clients.options # +# # +# See jvm8-clients.options and jvm11-clients.options for Java version # +# specific options. # +########################################################################### + +# intentionally left empty + +# The newline in the end of file is intentional diff --git a/etc/cass3/jvm-server.options b/etc/cass3/jvm-server.options new file mode 100644 index 0000000..46967f4 --- /dev/null +++ b/etc/cass3/jvm-server.options @@ -0,0 +1,188 @@ +########################################################################### +# jvm-server.options # +# # +# - all flags defined here will be used by cassandra to startup the JVM # +# - one flag should be specified per line # +# - lines that do not start with '-' will be ignored # +# - only static flags are accepted (no variables or parameters) # +# - dynamic flags will be appended to these on cassandra-env # +# # +# See jvm8-server.options and jvm11-server.options for Java version # +# specific options. # +########################################################################### + +###################### +# STARTUP PARAMETERS # +###################### + +# Uncomment any of the following properties to enable specific startup parameters + +# In a multi-instance deployment, multiple Cassandra instances will independently assume that all +# CPU processors are available to it. This setting allows you to specify a smaller set of processors +# and perhaps have affinity. +#-Dcassandra.available_processors=number_of_processors + +# The directory location of the cassandra.yaml file. +#-Dcassandra.config=directory + +# Sets the initial partitioner token for a node the first time the node is started. +#-Dcassandra.initial_token=token + +# Set to false to start Cassandra on a node but not have the node join the cluster. +#-Dcassandra.join_ring=true|false + +# Set to false to clear all gossip state for the node on restart. Use when you have changed node +# information in cassandra.yaml (such as listen_address). +#-Dcassandra.load_ring_state=true|false + +# Enable pluggable metrics reporter. See Pluggable metrics reporting in Cassandra 2.0.2. +#-Dcassandra.metricsReporterConfigFile=file + +# Set the port on which the CQL native transport listens for clients. (Default: 9042) +#-Dcassandra.native_transport_port=port + +# Overrides the partitioner. (Default: org.apache.cassandra.dht.Murmur3Partitioner) +#-Dcassandra.partitioner=partitioner + +# To replace a node that has died, restart a new node in its place specifying the address of the +# dead node. The new node must not have any data in its data directory, that is, it must be in the +# same state as before bootstrapping. +#-Dcassandra.replace_address=listen_address or broadcast_address of dead node + +# Allow restoring specific tables from an archived commit log. +#-Dcassandra.replayList=table + +# Allows overriding of the default RING_DELAY (30000ms), which is the amount of time a node waits +# before joining the ring. +#-Dcassandra.ring_delay_ms=ms + +# Set the SSL port for encrypted communication. (Default: 7001) +#-Dcassandra.ssl_storage_port=port + +# Set the port for inter-node communication. (Default: 7000) +#-Dcassandra.storage_port=port + +# Set the default location for the trigger JARs. (Default: conf/triggers) +#-Dcassandra.triggers_dir=directory + +# For testing new compaction and compression strategies. It allows you to experiment with different +# strategies and benchmark write performance differences without affecting the production workload. +#-Dcassandra.write_survey=true + +# To disable configuration via JMX of auth caches (such as those for credentials, permissions and +# roles). This will mean those config options can only be set (persistently) in cassandra.yaml +# and will require a restart for new values to take effect. +#-Dcassandra.disable_auth_caches_remote_configuration=true + +# To disable dynamic calculation of the page size used when indexing an entire partition (during +# initial index build/rebuild). If set to true, the page size will be fixed to the default of +# 10000 rows per page. +#-Dcassandra.force_default_indexing_page_size=true + +# Imposes an upper bound on hint lifetime below the normal min gc_grace_seconds +#-Dcassandra.maxHintTTL=max_hint_ttl_in_seconds + +######################## +# GENERAL JVM SETTINGS # +######################## + +# enable assertions. highly suggested for correct application functionality. +-ea + +# disable assertions for net.openhft.** because it runs out of memory by design +# if enabled and run for more than just brief testing +-da:net.openhft... + +# enable thread priorities, primarily so we can give periodic tasks +# a lower priority to avoid interfering with client workload +-XX:+UseThreadPriorities + +# Enable heap-dump if there's an OOM +-XX:+HeapDumpOnOutOfMemoryError + +# Per-thread stack size. +-Xss256k + +# Make sure all memory is faulted and zeroed on startup. +# This helps prevent soft faults in containers and makes +# transparent hugepage allocation more effective. +-XX:+AlwaysPreTouch + +# Disable biased locking as it does not benefit Cassandra. +-XX:-UseBiasedLocking + +# Enable thread-local allocation blocks and allow the JVM to automatically +# resize them at runtime. +-XX:+UseTLAB +-XX:+ResizeTLAB +-XX:+UseNUMA + +# http://www.evanjones.ca/jvm-mmap-pause.html +-XX:+PerfDisableSharedMem + +# Prefer binding to IPv4 network intefaces (when net.ipv6.bindv6only=1). See +# http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6342561 (short version: +# comment out this entry to enable IPv6 support). +-Djava.net.preferIPv4Stack=true + +### Debug options + +# uncomment to enable flight recorder +#-XX:+UnlockCommercialFeatures +#-XX:+FlightRecorder + +# uncomment to have Cassandra JVM listen for remote debuggers/profilers on port 1414 +#-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=1414 + +# uncomment to have Cassandra JVM log internal method compilation (developers only) +#-XX:+UnlockDiagnosticVMOptions +#-XX:+LogCompilation + +################# +# HEAP SETTINGS # +################# + +# Heap size is automatically calculated by cassandra-env based on this +# formula: max(min(1/2 ram, 1024MB), min(1/4 ram, 8GB)) +# That is: +# - calculate 1/2 ram and cap to 1024MB +# - calculate 1/4 ram and cap to 8192MB +# - pick the max +# +# For production use you may wish to adjust this for your environment. +# If that's the case, uncomment the -Xmx and Xms options below to override the +# automatic calculation of JVM heap memory. +# +# It is recommended to set min (-Xms) and max (-Xmx) heap sizes to +# the same value to avoid stop-the-world GC pauses during resize, and +# so that we can lock the heap in memory on startup to prevent any +# of it from being swapped out. +#-Xms4G +#-Xmx4G + +# Young generation size is automatically calculated by cassandra-env +# based on this formula: min(100 * num_cores, 1/4 * heap size) +# +# The main trade-off for the young generation is that the larger it +# is, the longer GC pause times will be. The shorter it is, the more +# expensive GC will be (usually). +# +# It is not recommended to set the young generation size if using the +# G1 GC, since that will override the target pause-time goal. +# More info: http://www.oracle.com/technetwork/articles/java/g1gc-1984535.html +# +# The example below assumes a modern 8-core+ machine for decent +# times. If in doubt, and if you do not particularly want to tweak, go +# 100 MB per physical CPU core. +#-Xmn800M + +################################### +# EXPIRATION DATE OVERFLOW POLICY # +################################### + +# Defines how to handle INSERT requests with TTL exceeding the maximum supported expiration date: +# * REJECT: this is the default policy and will reject any requests with expiration date timestamp after 2038-01-19T03:14:06+00:00. +# * CAP: any insert with TTL expiring after 2038-01-19T03:14:06+00:00 will expire on 2038-01-19T03:14:06+00:00 and the client will receive a warning. +# * CAP_NOWARN: same as previous, except that the client warning will not be emitted. +# +#-Dcassandra.expiration_date_overflow_policy=REJECT diff --git a/etc/cass3/jvm11-clients.options b/etc/cass3/jvm11-clients.options new file mode 100644 index 0000000..c88b7ab --- /dev/null +++ b/etc/cass3/jvm11-clients.options @@ -0,0 +1,29 @@ +########################################################################### +# jvm11-clients.options # +# # +# See jvm-clients.options. This file is specific for Java 11 and newer. # +########################################################################### + +################### +# JPMS SETTINGS # +################### + +-Djdk.attach.allowAttachSelf=true +--add-exports java.base/jdk.internal.misc=ALL-UNNAMED +--add-exports java.base/jdk.internal.ref=ALL-UNNAMED +--add-exports java.base/sun.nio.ch=ALL-UNNAMED +--add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED +--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED +--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED +--add-exports java.sql/java.sql=ALL-UNNAMED + +--add-opens java.base/java.lang.module=ALL-UNNAMED +--add-opens java.base/jdk.internal.loader=ALL-UNNAMED +--add-opens java.base/jdk.internal.ref=ALL-UNNAMED +--add-opens java.base/jdk.internal.reflect=ALL-UNNAMED +--add-opens java.base/jdk.internal.math=ALL-UNNAMED +--add-opens java.base/jdk.internal.module=ALL-UNNAMED +--add-opens java.base/jdk.internal.util.jar=ALL-UNNAMED +--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED + +# The newline in the end of file is intentional diff --git a/etc/cass3/jvm11-server.options b/etc/cass3/jvm11-server.options new file mode 100644 index 0000000..7e78467 --- /dev/null +++ b/etc/cass3/jvm11-server.options @@ -0,0 +1,103 @@ +########################################################################### +# jvm11-server.options # +# # +# See jvm-server.options. This file is specific for Java 11 and newer. # +########################################################################### + +################# +# GC SETTINGS # +################# + + + +### CMS Settings +-XX:+UseConcMarkSweepGC +-XX:+CMSParallelRemarkEnabled +-XX:SurvivorRatio=8 +-XX:MaxTenuringThreshold=1 +-XX:CMSInitiatingOccupancyFraction=75 +-XX:+UseCMSInitiatingOccupancyOnly +-XX:CMSWaitDuration=10000 +-XX:+CMSParallelInitialMarkEnabled +-XX:+CMSEdenChunksRecordAlways +## some JVMs will fill up their heap when accessed via JMX, see CASSANDRA-6541 +-XX:+CMSClassUnloadingEnabled + + + +### G1 Settings +## Use the Hotspot garbage-first collector. +#-XX:+UseG1GC +#-XX:+ParallelRefProcEnabled + +# +## Have the JVM do less remembered set work during STW, instead +## preferring concurrent GC. Reduces p99.9 latency. +#-XX:G1RSetUpdatingPauseTimePercent=5 +# +## Main G1GC tunable: lowering the pause target will lower throughput and vise versa. +## 200ms is the JVM default and lowest viable setting +## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml. +#-XX:MaxGCPauseMillis=500 + +## Optional G1 Settings +# Save CPU time on large (>= 16GB) heaps by delaying region scanning +# until the heap is 70% full. The default in Hotspot 8u40 is 40%. +#-XX:InitiatingHeapOccupancyPercent=70 + +# For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores. +# Otherwise equal to the number of cores when 8 or less. +# Machines with > 10 cores should try setting these to <= full cores. +#-XX:ParallelGCThreads=16 +# By default, ConcGCThreads is 1/4 of ParallelGCThreads. +# Setting both to the same value can reduce STW durations. +#-XX:ConcGCThreads=16 + + +### JPMS + +-Djdk.attach.allowAttachSelf=true +--add-exports java.base/jdk.internal.misc=ALL-UNNAMED +--add-exports java.base/jdk.internal.ref=ALL-UNNAMED +--add-exports java.base/sun.nio.ch=ALL-UNNAMED +--add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED +--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED +--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED +--add-exports java.sql/java.sql=ALL-UNNAMED + +--add-opens java.base/java.lang.module=ALL-UNNAMED +--add-opens java.base/jdk.internal.loader=ALL-UNNAMED +--add-opens java.base/jdk.internal.ref=ALL-UNNAMED +--add-opens java.base/jdk.internal.reflect=ALL-UNNAMED +--add-opens java.base/jdk.internal.math=ALL-UNNAMED +--add-opens java.base/jdk.internal.module=ALL-UNNAMED +--add-opens java.base/jdk.internal.util.jar=ALL-UNNAMED +--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED + + +### GC logging options -- uncomment to enable + +# Java 11 (and newer) GC logging options: +# See description of https://bugs.openjdk.java.net/browse/JDK-8046148 for details about the syntax +# The following is the equivalent to -XX:+PrintGCDetails -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M +#-Xlog:gc=info,heap*=trace,age*=debug,safepoint=info,promotion*=trace:file=/var/log/cassandra/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760 + +# Notes for Java 8 migration: +# +# -XX:+PrintGCDetails maps to -Xlog:gc*:... - i.e. add a '*' after "gc" +# -XX:+PrintGCDateStamps maps to decorator 'time' +# +# -XX:+PrintHeapAtGC maps to 'heap' with level 'trace' +# -XX:+PrintTenuringDistribution maps to 'age' with level 'debug' +# -XX:+PrintGCApplicationStoppedTime maps to 'safepoint' with level 'info' +# -XX:+PrintPromotionFailure maps to 'promotion' with level 'trace' +# -XX:PrintFLSStatistics=1 maps to 'freelist' with level 'trace' + +### Netty Options + +# On Java >= 9 Netty requires the io.netty.tryReflectionSetAccessible system property to be set to true to enable +# creation of direct buffers using Unsafe. Without it, this falls back to ByteBuffer.allocateDirect which has +# inferior performance and risks exceeding MaxDirectMemory +-Dio.netty.tryReflectionSetAccessible=true + +# The newline in the end of file is intentional diff --git a/etc/cass3/jvm8-clients.options b/etc/cass3/jvm8-clients.options new file mode 100644 index 0000000..7d1b2ef --- /dev/null +++ b/etc/cass3/jvm8-clients.options @@ -0,0 +1,9 @@ +########################################################################### +# jvm8-clients.options # +# # +# See jvm-clients.options. This file is specific for Java 8 and newer. # +########################################################################### + +# intentionally left empty + +# The newline in the end of file is intentional diff --git a/etc/cass3/jvm8-server.options b/etc/cass3/jvm8-server.options new file mode 100644 index 0000000..6214669 --- /dev/null +++ b/etc/cass3/jvm8-server.options @@ -0,0 +1,76 @@ +########################################################################### +# jvm8-server.options # +# # +# See jvm-server.options. This file is specific for Java 8 and newer. # +########################################################################### + +######################## +# GENERAL JVM SETTINGS # +######################## + +# allows lowering thread priority without being root on linux - probably +# not necessary on Windows but doesn't harm anything. +# see http://tech.stolsvik.com/2010/01/linux-java-thread-priorities-workaround.html +-XX:ThreadPriorityPolicy=42 + +################# +# GC SETTINGS # +################# + +### CMS Settings +-XX:+UseParNewGC +-XX:+UseConcMarkSweepGC +-XX:+CMSParallelRemarkEnabled +-XX:SurvivorRatio=8 +-XX:MaxTenuringThreshold=1 +-XX:CMSInitiatingOccupancyFraction=75 +-XX:+UseCMSInitiatingOccupancyOnly +-XX:CMSWaitDuration=10000 +-XX:+CMSParallelInitialMarkEnabled +-XX:+CMSEdenChunksRecordAlways +## some JVMs will fill up their heap when accessed via JMX, see CASSANDRA-6541 +-XX:+CMSClassUnloadingEnabled + +### G1 Settings +## Use the Hotspot garbage-first collector. +#-XX:+UseG1GC +#-XX:+ParallelRefProcEnabled + +# +## Have the JVM do less remembered set work during STW, instead +## preferring concurrent GC. Reduces p99.9 latency. +#-XX:G1RSetUpdatingPauseTimePercent=5 +# +## Main G1GC tunable: lowering the pause target will lower throughput and vise versa. +## 200ms is the JVM default and lowest viable setting +## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml. +#-XX:MaxGCPauseMillis=500 + +## Optional G1 Settings +# Save CPU time on large (>= 16GB) heaps by delaying region scanning +# until the heap is 70% full. The default in Hotspot 8u40 is 40%. +#-XX:InitiatingHeapOccupancyPercent=70 + +# For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores. +# Otherwise equal to the number of cores when 8 or less. +# Machines with > 10 cores should try setting these to <= full cores. +#-XX:ParallelGCThreads=16 +# By default, ConcGCThreads is 1/4 of ParallelGCThreads. +# Setting both to the same value can reduce STW durations. +#-XX:ConcGCThreads=16 + +### GC logging options -- uncomment to enable + +-XX:+PrintGCDetails +-XX:+PrintGCDateStamps +-XX:+PrintHeapAtGC +-XX:+PrintTenuringDistribution +-XX:+PrintGCApplicationStoppedTime +-XX:+PrintPromotionFailure +#-XX:PrintFLSStatistics=1 +#-Xloggc:/var/log/cassandra/gc.log +-XX:+UseGCLogFileRotation +-XX:NumberOfGCLogFiles=10 +-XX:GCLogFileSize=10M + +# The newline in the end of file is intentional diff --git a/etc/cass3/logback.xml b/etc/cass3/logback.xml new file mode 100644 index 0000000..e98fea4 --- /dev/null +++ b/etc/cass3/logback.xml @@ -0,0 +1,124 @@ + + + + + + + + + + + + + + INFO + + ${cassandra.logdir}/system.log + + + ${cassandra.logdir}/system.log.%d{yyyy-MM-dd}.%i.zip + + 50MB + 7 + 5GB + + + %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + + + + + + + ${cassandra.logdir}/debug.log + + + ${cassandra.logdir}/debug.log.%d{yyyy-MM-dd}.%i.zip + + 50MB + 7 + 5GB + + + %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + + + + + + + 1024 + 0 + true + + + + + + + + INFO + + + %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/etc/cass_startup_client/.gitignore b/etc/cass_startup_client/.gitignore new file mode 100644 index 0000000..e81e4e0 --- /dev/null +++ b/etc/cass_startup_client/.gitignore @@ -0,0 +1,11 @@ +# ignore everything +#* + +# except +# .gitignore +# !cassandra.yaml +# !jvm* +# !logback.xml +# !commitlog_archiving.properties +# !cassandra-rackdc.properties +# !cassandra-env.sh \ No newline at end of file diff --git a/etc/cass_startup_client/cassandra-env.sh b/etc/cass_startup_client/cassandra-env.sh new file mode 100644 index 0000000..2e3c8c9 --- /dev/null +++ b/etc/cass_startup_client/cassandra-env.sh @@ -0,0 +1,307 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +calculate_heap_sizes() +{ + case "`uname`" in + Linux) + system_memory_in_mb=`free -m | awk '/:/ {print $2;exit}'` + system_cpu_cores=`egrep -c 'processor([[:space:]]+):.*' /proc/cpuinfo` + ;; + FreeBSD) + system_memory_in_bytes=`sysctl hw.physmem | awk '{print $2}'` + system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024` + system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'` + ;; + SunOS) + system_memory_in_mb=`prtconf | awk '/Memory size:/ {print $3}'` + system_cpu_cores=`psrinfo | wc -l` + ;; + Darwin) + system_memory_in_bytes=`sysctl hw.memsize | awk '{print $2}'` + system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024` + system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'` + ;; + *) + # assume reasonable defaults for e.g. a modern desktop or + # cheap server + system_memory_in_mb="2048" + system_cpu_cores="2" + ;; + esac + + # some systems like the raspberry pi don't report cores, use at least 1 + if [ "$system_cpu_cores" -lt "1" ] + then + system_cpu_cores="1" + fi + + # set max heap size based on the following + # max(min(1/2 ram, 1024MB), min(1/4 ram, 8GB)) + # calculate 1/2 ram and cap to 1024MB + # calculate 1/4 ram and cap to 8192MB + # pick the max + half_system_memory_in_mb=`expr $system_memory_in_mb / 2` + quarter_system_memory_in_mb=`expr $half_system_memory_in_mb / 2` + if [ "$half_system_memory_in_mb" -gt "1024" ] + then + half_system_memory_in_mb="1024" + fi + if [ "$quarter_system_memory_in_mb" -gt "8192" ] + then + quarter_system_memory_in_mb="8192" + fi + if [ "$half_system_memory_in_mb" -gt "$quarter_system_memory_in_mb" ] + then + max_heap_size_in_mb="$half_system_memory_in_mb" + else + max_heap_size_in_mb="$quarter_system_memory_in_mb" + fi + MAX_HEAP_SIZE="${max_heap_size_in_mb}M" + + # Young gen: min(max_sensible_per_modern_cpu_core * num_cores, 1/4 * heap size) + max_sensible_yg_per_core_in_mb="100" + max_sensible_yg_in_mb=`expr $max_sensible_yg_per_core_in_mb "*" $system_cpu_cores` + + desired_yg_in_mb=`expr $max_heap_size_in_mb / 4` + + if [ "$desired_yg_in_mb" -gt "$max_sensible_yg_in_mb" ] + then + HEAP_NEWSIZE="${max_sensible_yg_in_mb}M" + else + HEAP_NEWSIZE="${desired_yg_in_mb}M" + fi +} + +# Sets the path where logback and GC logs are written. +if [ "x$CASSANDRA_LOG_DIR" = "x" ] ; then + CASSANDRA_LOG_DIR="$CASSANDRA_HOME/logs" +fi + +#GC log path has to be defined here because it needs to access CASSANDRA_HOME +if [ $JAVA_VERSION -ge 11 ] ; then + # See description of https://bugs.openjdk.java.net/browse/JDK-8046148 for details about the syntax + # The following is the equivalent to -XX:+PrintGCDetails -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M + echo "$JVM_OPTS" | grep -qe "-[X]log:gc" + if [ "$?" = "1" ] ; then # [X] to prevent ccm from replacing this line + # only add -Xlog:gc if it's not mentioned in jvm-server.options file + mkdir -p ${CASSANDRA_LOG_DIR} + JVM_OPTS="$JVM_OPTS -Xlog:gc=info,heap*=trace,age*=debug,safepoint=info,promotion*=trace:file=${CASSANDRA_LOG_DIR}/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760" + fi +else + # Java 8 + echo "$JVM_OPTS" | grep -qe "-[X]loggc" + if [ "$?" = "1" ] ; then # [X] to prevent ccm from replacing this line + # only add -Xlog:gc if it's not mentioned in jvm-server.options file + mkdir -p ${CASSANDRA_LOG_DIR} + JVM_OPTS="$JVM_OPTS -Xloggc:${CASSANDRA_LOG_DIR}/gc.log" + fi +fi + +# Check what parameters were defined on jvm-server.options file to avoid conflicts +echo $JVM_OPTS | grep -q Xmn +DEFINED_XMN=$? +echo $JVM_OPTS | grep -q Xmx +DEFINED_XMX=$? +echo $JVM_OPTS | grep -q Xms +DEFINED_XMS=$? +echo $JVM_OPTS | grep -q UseConcMarkSweepGC +USING_CMS=$? +echo $JVM_OPTS | grep -q +UseG1GC +USING_G1=$? + +# Override these to set the amount of memory to allocate to the JVM at +# start-up. For production use you may wish to adjust this for your +# environment. MAX_HEAP_SIZE is the total amount of memory dedicated +# to the Java heap. HEAP_NEWSIZE refers to the size of the young +# generation. Both MAX_HEAP_SIZE and HEAP_NEWSIZE should be either set +# or not (if you set one, set the other). +# +# The main trade-off for the young generation is that the larger it +# is, the longer GC pause times will be. The shorter it is, the more +# expensive GC will be (usually). +# +# The example HEAP_NEWSIZE assumes a modern 8-core+ machine for decent pause +# times. If in doubt, and if you do not particularly want to tweak, go with +# 100 MB per physical CPU core. + +#MAX_HEAP_SIZE="4G" +#HEAP_NEWSIZE="800M" + +# Set this to control the amount of arenas per-thread in glibc +#export MALLOC_ARENA_MAX=4 + +# only calculate the size if it's not set manually +if [ "x$MAX_HEAP_SIZE" = "x" ] && [ "x$HEAP_NEWSIZE" = "x" -o $USING_G1 -eq 0 ]; then + calculate_heap_sizes +elif [ "x$MAX_HEAP_SIZE" = "x" ] || [ "x$HEAP_NEWSIZE" = "x" -a $USING_G1 -ne 0 ]; then + echo "please set or unset MAX_HEAP_SIZE and HEAP_NEWSIZE in pairs when using CMS GC (see cassandra-env.sh)" + exit 1 +fi + +if [ "x$MALLOC_ARENA_MAX" = "x" ] ; then + export MALLOC_ARENA_MAX=4 +fi + +# We only set -Xms and -Xmx if they were not defined on jvm-server.options file +# If defined, both Xmx and Xms should be defined together. +if [ $DEFINED_XMX -ne 0 ] && [ $DEFINED_XMS -ne 0 ]; then + JVM_OPTS="$JVM_OPTS -Xms${MAX_HEAP_SIZE}" + JVM_OPTS="$JVM_OPTS -Xmx${MAX_HEAP_SIZE}" +elif [ $DEFINED_XMX -ne 0 ] || [ $DEFINED_XMS -ne 0 ]; then + echo "Please set or unset -Xmx and -Xms flags in pairs on jvm-server.options file." + exit 1 +fi + +# We only set -Xmn flag if it was not defined in jvm-server.options file +# and if the CMS GC is being used +# If defined, both Xmn and Xmx should be defined together. +if [ $DEFINED_XMN -eq 0 ] && [ $DEFINED_XMX -ne 0 ]; then + echo "Please set or unset -Xmx and -Xmn flags in pairs on jvm-server.options file." + exit 1 +elif [ $DEFINED_XMN -ne 0 ] && [ $USING_CMS -eq 0 ]; then + JVM_OPTS="$JVM_OPTS -Xmn${HEAP_NEWSIZE}" +fi + +# We fail to start if -Xmn is used with G1 GC is being used +# See comments for -Xmn in jvm-server.options +if [ $DEFINED_XMN -eq 0 ] && [ $USING_G1 -eq 0 ]; then + echo "It is not recommended to set -Xmn with the G1 garbage collector. See comments for -Xmn in jvm-server.options for details." + exit 1 +fi + +if [ "$JVM_ARCH" = "64-Bit" ] && [ $USING_CMS -eq 0 ]; then + JVM_OPTS="$JVM_OPTS -XX:+UseCondCardMark" +fi + +# provides hints to the JIT compiler +JVM_OPTS="$JVM_OPTS -XX:CompileCommandFile=$CASSANDRA_CONF/hotspot_compiler" + +# add the jamm javaagent +JVM_OPTS="$JVM_OPTS -javaagent:$CASSANDRA_HOME/lib/jamm-0.3.2.jar" + +# set jvm HeapDumpPath with CASSANDRA_HEAPDUMP_DIR +if [ "x$CASSANDRA_HEAPDUMP_DIR" != "x" ]; then + JVM_OPTS="$JVM_OPTS -XX:HeapDumpPath=$CASSANDRA_HEAPDUMP_DIR/cassandra-`date +%s`-pid$$.hprof" +fi + +# stop the jvm on OutOfMemoryError as it can result in some data corruption +# uncomment the preferred option +# ExitOnOutOfMemoryError and CrashOnOutOfMemoryError require a JRE greater or equals to 1.7 update 101 or 1.8 update 92 +# For OnOutOfMemoryError we cannot use the JVM_OPTS variables because bash commands split words +# on white spaces without taking quotes into account +# JVM_OPTS="$JVM_OPTS -XX:+ExitOnOutOfMemoryError" +# JVM_OPTS="$JVM_OPTS -XX:+CrashOnOutOfMemoryError" +JVM_ON_OUT_OF_MEMORY_ERROR_OPT="-XX:OnOutOfMemoryError=kill -9 %p" + +# print an heap histogram on OutOfMemoryError +# JVM_OPTS="$JVM_OPTS -Dcassandra.printHeapHistogramOnOutOfMemoryError=true" + +# jmx: metrics and administration interface +# +# add this if you're having trouble connecting: +# JVM_OPTS="$JVM_OPTS -Djava.rmi.server.hostname=" +# +# see +# https://blogs.oracle.com/jmxetc/entry/troubleshooting_connection_problems_in_jconsole +# for more on configuring JMX through firewalls, etc. (Short version: +# get it working with no firewall first.) +# +# Cassandra ships with JMX accessible *only* from localhost. +# To enable remote JMX connections, uncomment lines below +# with authentication and/or ssl enabled. See https://wiki.apache.org/cassandra/JmxSecurity +# +if [ "x$LOCAL_JMX" = "x" ]; then + LOCAL_JMX=yes +fi + +# Specifies the default port over which Cassandra will be available for +# JMX connections. +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +JMX_PORT="7199" + +if [ "$LOCAL_JMX" = "yes" ]; then + JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.local.port=$JMX_PORT" + JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=false" +else + JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.port=$JMX_PORT" + # if ssl is enabled the same port cannot be used for both jmx and rmi so either + # pick another value for this property or comment out to use a random port (though see CASSANDRA-7087 for origins) + JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.rmi.port=$JMX_PORT" + + # turn on JMX authentication. See below for further options + JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=true" + + # jmx ssl options + #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl=true" + #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.need.client.auth=true" + #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.enabled.protocols=" + #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.enabled.cipher.suites=" + #JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.keyStore=/path/to/keystore" + #JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.keyStorePassword=" + #JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.trustStore=/path/to/truststore" + #JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.trustStorePassword=" +fi + +# jmx authentication and authorization options. By default, auth is only +# activated for remote connections but they can also be enabled for local only JMX +## Basic file based authn & authz +JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.password.file=/etc/cassandra/jmxremote.password" +#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.access.file=/etc/cassandra/jmxremote.access" +## Custom auth settings which can be used as alternatives to JMX's out of the box auth utilities. +## JAAS login modules can be used for authentication by uncommenting these two properties. +## Cassandra ships with a LoginModule implementation - org.apache.cassandra.auth.CassandraLoginModule - +## which delegates to the IAuthenticator configured in cassandra.yaml. See the sample JAAS configuration +## file cassandra-jaas.config +#JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.login.config=CassandraLogin" +#JVM_OPTS="$JVM_OPTS -Djava.security.auth.login.config=$CASSANDRA_CONF/cassandra-jaas.config" + +## Cassandra also ships with a helper for delegating JMX authz calls to the configured IAuthorizer, +## uncomment this to use it. Requires one of the two authentication options to be enabled +#JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.authorizer=org.apache.cassandra.auth.jmx.AuthorizationProxy" + +# To use mx4j, an HTML interface for JMX, add mx4j-tools.jar to the lib/ +# directory. +# See http://cassandra.apache.org/doc/latest/operating/metrics.html#jmx +# By default mx4j listens on the broadcast_address, port 8081. Uncomment the following lines +# to control its listen address and port. +#MX4J_ADDRESS="127.0.0.1" +#MX4J_PORT="8081" + +# Cassandra uses SIGAR to capture OS metrics CASSANDRA-7838 +# for SIGAR we have to set the java.library.path +# to the location of the native libraries. +JVM_OPTS="$JVM_OPTS -Djava.library.path=$CASSANDRA_HOME/lib/sigar-bin" + +if [ "x$MX4J_ADDRESS" != "x" ]; then + if [[ "$MX4J_ADDRESS" == \-Dmx4jaddress* ]]; then + # Backward compatible with the older style #13578 + JVM_OPTS="$JVM_OPTS $MX4J_ADDRESS" + else + JVM_OPTS="$JVM_OPTS -Dmx4jaddress=$MX4J_ADDRESS" + fi +fi +if [ "x$MX4J_PORT" != "x" ]; then + if [[ "$MX4J_PORT" == \-Dmx4jport* ]]; then + # Backward compatible with the older style #13578 + JVM_OPTS="$JVM_OPTS $MX4J_PORT" + else + JVM_OPTS="$JVM_OPTS -Dmx4jport=$MX4J_PORT" + fi +fi + +JVM_OPTS="$JVM_OPTS $JVM_EXTRA_OPTS" + diff --git a/etc/cass_startup_client/cassandra-rackdc.properties b/etc/cass_startup_client/cassandra-rackdc.properties new file mode 100644 index 0000000..84716f5 --- /dev/null +++ b/etc/cass_startup_client/cassandra-rackdc.properties @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# These properties are used with GossipingPropertyFileSnitch and will +# indicate the rack and dc for this node +dc= Mars +rack= West + +# Add a suffix to a datacenter name. Used by the Ec2Snitch and Ec2MultiRegionSnitch +# to append a string to the EC2 region name. +#dc_suffix= + +# Uncomment the following line to make this snitch prefer the internal ip when possible, as the Ec2MultiRegionSnitch does. +# prefer_local=true + +# Datacenter and rack naming convention used by the Ec2Snitch and Ec2MultiRegionSnitch. +# Options are: +# legacy : datacenter name is the part of the availability zone name preceding the last "-" +# when the zone ends in -1 and includes the number if not -1. Rack is the portion of +# the availability zone name following the last "-". +# Examples: us-west-1a => dc: us-west, rack: 1a; us-west-2b => dc: us-west-2, rack: 2b; +# YOU MUST USE THIS VALUE IF YOU ARE UPGRADING A PRE-4.0 CLUSTER +# standard : Default value. datacenter name is the standard AWS region name, including the number. +# rack name is the region plus the availability zone letter. +# Examples: us-west-1a => dc: us-west-1, rack: us-west-1a; us-west-2b => dc: us-west-2, rack: us-west-2b; +# ec2_naming_scheme=standard diff --git a/etc/cass_startup_client/cassandra.yaml b/etc/cass_startup_client/cassandra.yaml new file mode 100644 index 0000000..4baf617 --- /dev/null +++ b/etc/cass_startup_client/cassandra.yaml @@ -0,0 +1,1419 @@ + +# Cassandra storage config YAML + +# NOTE: +# See https://cassandra.apache.org/doc/latest/configuration/ for +# full explanations of configuration directives +# /NOTE + +# The name of the cluster. This is mainly used to prevent machines in +# one logical cluster from joining another. +cluster_name: SolarSystem + +# This defines the number of tokens randomly assigned to this node on the ring +# The more tokens, relative to other nodes, the larger the proportion of data +# that this node will store. You probably want all nodes to have the same number +# of tokens assuming they have equal hardware capability. +# +# If you leave this unspecified, Cassandra will use the default of 1 token for legacy compatibility, +# and will use the initial_token as described below. +# +# Specifying initial_token will override this setting on the node's initial start, +# on subsequent starts, this setting will apply even if initial token is set. +# +# See https://cassandra.apache.org/doc/latest/getting_started/production.html#tokens for +# best practice information about num_tokens. +# +num_tokens: 128 + +# Triggers automatic allocation of num_tokens tokens for this node. The allocation +# algorithm attempts to choose tokens in a way that optimizes replicated load over +# the nodes in the datacenter for the replica factor. +# +# The load assigned to each node will be close to proportional to its number of +# vnodes. +# +# Only supported with the Murmur3Partitioner. + +# Replica factor is determined via the replication strategy used by the specified +# keyspace. +# allocate_tokens_for_keyspace: KEYSPACE + +# Replica factor is explicitly set, regardless of keyspace or datacenter. +# This is the replica factor within the datacenter, like NTS. +allocate_tokens_for_local_replication_factor: 3 + +# initial_token allows you to specify tokens manually. While you can use it with +# vnodes (num_tokens > 1, above) -- in which case you should provide a +# comma-separated list -- it's primarily used when adding nodes to legacy clusters +# that do not have vnodes enabled. +# initial_token: + +# May either be "true" or "false" to enable globally +hinted_handoff_enabled: true + +# When hinted_handoff_enabled is true, a black list of data centers that will not +# perform hinted handoff +# hinted_handoff_disabled_datacenters: +# - DC1 +# - DC2 + +# this defines the maximum amount of time a dead host will have hints +# generated. After it has been dead this long, new hints for it will not be +# created until it has been seen alive and gone down again. +max_hint_window_in_ms: 10800000 # 3 hours + +# Maximum throttle in KBs per second, per delivery thread. This will be +# reduced proportionally to the number of nodes in the cluster. (If there +# are two nodes in the cluster, each delivery thread will use the maximum +# rate; if there are three, each will throttle to half of the maximum, +# since we expect two nodes to be delivering hints simultaneously.) +hinted_handoff_throttle_in_kb: 1024 + +# Number of threads with which to deliver hints; +# Consider increasing this number when you have multi-dc deployments, since +# cross-dc handoff tends to be slower +max_hints_delivery_threads: 2 + +# Directory where Cassandra should store hints. +# If not set, the default directory is $CASSANDRA_HOME/data/hints. +# hints_directory: /var/lib/cassandra/hints + +# How often hints should be flushed from the internal buffers to disk. +# Will *not* trigger fsync. +hints_flush_period_in_ms: 10000 + +# Maximum size for a single hints file, in megabytes. +max_hints_file_size_in_mb: 128 + +# Compression to apply to the hint files. If omitted, hints files +# will be written uncompressed. LZ4, Snappy, and Deflate compressors +# are supported. +#hints_compression: +# - class_name: LZ4Compressor +# parameters: +# - + +# Maximum throttle in KBs per second, total. This will be +# reduced proportionally to the number of nodes in the cluster. +batchlog_replay_throttle_in_kb: 1024 + +# Authentication backend, implementing IAuthenticator; used to identify users +# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthenticator, +# PasswordAuthenticator}. +# +# - AllowAllAuthenticator performs no checks - set it to disable authentication. +# - PasswordAuthenticator relies on username/password pairs to authenticate +# users. It keeps usernames and hashed passwords in system_auth.roles table. +# Please increase system_auth keyspace replication factor if you use this authenticator. +# If using PasswordAuthenticator, CassandraRoleManager must also be used (see below) +authenticator: AllowAllAuthenticator + +# Authorization backend, implementing IAuthorizer; used to limit access/provide permissions +# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthorizer, +# CassandraAuthorizer}. +# +# - AllowAllAuthorizer allows any action to any user - set it to disable authorization. +# - CassandraAuthorizer stores permissions in system_auth.role_permissions table. Please +# increase system_auth keyspace replication factor if you use this authorizer. +authorizer: AllowAllAuthorizer + +# Part of the Authentication & Authorization backend, implementing IRoleManager; used +# to maintain grants and memberships between roles. +# Out of the box, Cassandra provides org.apache.cassandra.auth.CassandraRoleManager, +# which stores role information in the system_auth keyspace. Most functions of the +# IRoleManager require an authenticated login, so unless the configured IAuthenticator +# actually implements authentication, most of this functionality will be unavailable. +# +# - CassandraRoleManager stores role data in the system_auth keyspace. Please +# increase system_auth keyspace replication factor if you use this role manager. +role_manager: CassandraRoleManager + +# Network authorization backend, implementing INetworkAuthorizer; used to restrict user +# access to certain DCs +# Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllNetworkAuthorizer, +# CassandraNetworkAuthorizer}. +# +# - AllowAllNetworkAuthorizer allows access to any DC to any user - set it to disable authorization. +# - CassandraNetworkAuthorizer stores permissions in system_auth.network_permissions table. Please +# increase system_auth keyspace replication factor if you use this authorizer. +network_authorizer: AllowAllNetworkAuthorizer + +# Validity period for roles cache (fetching granted roles can be an expensive +# operation depending on the role manager, CassandraRoleManager is one example) +# Granted roles are cached for authenticated sessions in AuthenticatedUser and +# after the period specified here, become eligible for (async) reload. +# Defaults to 2000, set to 0 to disable caching entirely. +# Will be disabled automatically for AllowAllAuthenticator. +roles_validity_in_ms: 2000 + +# Refresh interval for roles cache (if enabled). +# After this interval, cache entries become eligible for refresh. Upon next +# access, an async reload is scheduled and the old value returned until it +# completes. If roles_validity_in_ms is non-zero, then this must be +# also. +# Defaults to the same value as roles_validity_in_ms. +# roles_update_interval_in_ms: 2000 + +# Validity period for permissions cache (fetching permissions can be an +# expensive operation depending on the authorizer, CassandraAuthorizer is +# one example). Defaults to 2000, set to 0 to disable. +# Will be disabled automatically for AllowAllAuthorizer. +permissions_validity_in_ms: 2000 + +# Refresh interval for permissions cache (if enabled). +# After this interval, cache entries become eligible for refresh. Upon next +# access, an async reload is scheduled and the old value returned until it +# completes. If permissions_validity_in_ms is non-zero, then this must be +# also. +# Defaults to the same value as permissions_validity_in_ms. +# permissions_update_interval_in_ms: 2000 + +# Validity period for credentials cache. This cache is tightly coupled to +# the provided PasswordAuthenticator implementation of IAuthenticator. If +# another IAuthenticator implementation is configured, this cache will not +# be automatically used and so the following settings will have no effect. +# Please note, credentials are cached in their encrypted form, so while +# activating this cache may reduce the number of queries made to the +# underlying table, it may not bring a significant reduction in the +# latency of individual authentication attempts. +# Defaults to 2000, set to 0 to disable credentials caching. +credentials_validity_in_ms: 2000 + +# Refresh interval for credentials cache (if enabled). +# After this interval, cache entries become eligible for refresh. Upon next +# access, an async reload is scheduled and the old value returned until it +# completes. If credentials_validity_in_ms is non-zero, then this must be +# also. +# Defaults to the same value as credentials_validity_in_ms. +# credentials_update_interval_in_ms: 2000 + +# The partitioner is responsible for distributing groups of rows (by +# partition key) across nodes in the cluster. The partitioner can NOT be +# changed without reloading all data. If you are adding nodes or upgrading, +# you should set this to the same partitioner that you are currently using. +# +# The default partitioner is the Murmur3Partitioner. Older partitioners +# such as the RandomPartitioner, ByteOrderedPartitioner, and +# OrderPreservingPartitioner have been included for backward compatibility only. +# For new clusters, you should NOT change this value. +# +partitioner: org.apache.cassandra.dht.Murmur3Partitioner + +# Directories where Cassandra should store data on disk. If multiple +# directories are specified, Cassandra will spread data evenly across +# them by partitioning the token ranges. +# If not set, the default directory is $CASSANDRA_HOME/data/data. +# data_file_directories: +# - /var/lib/cassandra/data + +# Directory were Cassandra should store the data of the local system keyspaces. +# By default Cassandra will store the data of the local system keyspaces in the first of the data directories specified +# by data_file_directories. +# This approach ensures that if one of the other disks is lost Cassandra can continue to operate. For extra security +# this setting allows to store those data on a different directory that provides redundancy. +# local_system_data_file_directory: + +# commit log. when running on magnetic HDD, this should be a +# separate spindle than the data directories. +# If not set, the default directory is $CASSANDRA_HOME/data/commitlog. +# commitlog_directory: /var/lib/cassandra/commitlog + +# Enable / disable CDC functionality on a per-node basis. This modifies the logic used +# for write path allocation rejection (standard: never reject. cdc: reject Mutation +# containing a CDC-enabled table if at space limit in cdc_raw_directory). +cdc_enabled: false + +# CommitLogSegments are moved to this directory on flush if cdc_enabled: true and the +# segment contains mutations for a CDC-enabled table. This should be placed on a +# separate spindle than the data directories. If not set, the default directory is +# $CASSANDRA_HOME/data/cdc_raw. +# cdc_raw_directory: /var/lib/cassandra/cdc_raw + +# Policy for data disk failures: +# +# die +# shut down gossip and client transports and kill the JVM for any fs errors or +# single-sstable errors, so the node can be replaced. +# +# stop_paranoid +# shut down gossip and client transports even for single-sstable errors, +# kill the JVM for errors during startup. +# +# stop +# shut down gossip and client transports, leaving the node effectively dead, but +# can still be inspected via JMX, kill the JVM for errors during startup. +# +# best_effort +# stop using the failed disk and respond to requests based on +# remaining available sstables. This means you WILL see obsolete +# data at CL.ONE! +# +# ignore +# ignore fatal errors and let requests fail, as in pre-1.2 Cassandra +disk_failure_policy: stop + +# Policy for commit disk failures: +# +# die +# shut down the node and kill the JVM, so the node can be replaced. +# +# stop +# shut down the node, leaving the node effectively dead, but +# can still be inspected via JMX. +# +# stop_commit +# shutdown the commit log, letting writes collect but +# continuing to service reads, as in pre-2.0.5 Cassandra +# +# ignore +# ignore fatal errors and let the batches fail +commit_failure_policy: stop + +# Maximum size of the native protocol prepared statement cache +# +# Valid values are either "auto" (omitting the value) or a value greater 0. +# +# Note that specifying a too large value will result in long running GCs and possbily +# out-of-memory errors. Keep the value at a small fraction of the heap. +# +# If you constantly see "prepared statements discarded in the last minute because +# cache limit reached" messages, the first step is to investigate the root cause +# of these messages and check whether prepared statements are used correctly - +# i.e. use bind markers for variable parts. +# +# Do only change the default value, if you really have more prepared statements than +# fit in the cache. In most cases it is not neccessary to change this value. +# Constantly re-preparing statements is a performance penalty. +# +# Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater +prepared_statements_cache_size_mb: + +# Maximum size of the key cache in memory. +# +# Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the +# minimum, sometimes more. The key cache is fairly tiny for the amount of +# time it saves, so it's worthwhile to use it at large numbers. +# The row cache saves even more time, but must contain the entire row, +# so it is extremely space-intensive. It's best to only use the +# row cache if you have hot rows or static rows. +# +# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. +# +# Default value is empty to make it "auto" (min(5% of Heap (in MB), 100MB)). Set to 0 to disable key cache. +key_cache_size_in_mb: + +# Duration in seconds after which Cassandra should +# save the key cache. Caches are saved to saved_caches_directory as +# specified in this configuration file. +# +# Saved caches greatly improve cold-start speeds, and is relatively cheap in +# terms of I/O for the key cache. Row cache saving is much more expensive and +# has limited use. +# +# Default is 14400 or 4 hours. +key_cache_save_period: 14400 + +# Number of keys from the key cache to save +# Disabled by default, meaning all keys are going to be saved +# key_cache_keys_to_save: 100 + +# Row cache implementation class name. Available implementations: +# +# org.apache.cassandra.cache.OHCProvider +# Fully off-heap row cache implementation (default). +# +# org.apache.cassandra.cache.SerializingCacheProvider +# This is the row cache implementation availabile +# in previous releases of Cassandra. +# row_cache_class_name: org.apache.cassandra.cache.OHCProvider + +# Maximum size of the row cache in memory. +# Please note that OHC cache implementation requires some additional off-heap memory to manage +# the map structures and some in-flight memory during operations before/after cache entries can be +# accounted against the cache capacity. This overhead is usually small compared to the whole capacity. +# Do not specify more memory that the system can afford in the worst usual situation and leave some +# headroom for OS block level cache. Do never allow your system to swap. +# +# Default value is 0, to disable row caching. +row_cache_size_in_mb: 0 + +# Duration in seconds after which Cassandra should save the row cache. +# Caches are saved to saved_caches_directory as specified in this configuration file. +# +# Saved caches greatly improve cold-start speeds, and is relatively cheap in +# terms of I/O for the key cache. Row cache saving is much more expensive and +# has limited use. +# +# Default is 0 to disable saving the row cache. +row_cache_save_period: 0 + +# Number of keys from the row cache to save. +# Specify 0 (which is the default), meaning all keys are going to be saved +# row_cache_keys_to_save: 100 + +# Maximum size of the counter cache in memory. +# +# Counter cache helps to reduce counter locks' contention for hot counter cells. +# In case of RF = 1 a counter cache hit will cause Cassandra to skip the read before +# write entirely. With RF > 1 a counter cache hit will still help to reduce the duration +# of the lock hold, helping with hot counter cell updates, but will not allow skipping +# the read entirely. Only the local (clock, count) tuple of a counter cell is kept +# in memory, not the whole counter, so it's relatively cheap. +# +# NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. +# +# Default value is empty to make it "auto" (min(2.5% of Heap (in MB), 50MB)). Set to 0 to disable counter cache. +# NOTE: if you perform counter deletes and rely on low gcgs, you should disable the counter cache. +counter_cache_size_in_mb: + +# Duration in seconds after which Cassandra should +# save the counter cache (keys only). Caches are saved to saved_caches_directory as +# specified in this configuration file. +# +# Default is 7200 or 2 hours. +counter_cache_save_period: 7200 + +# Number of keys from the counter cache to save +# Disabled by default, meaning all keys are going to be saved +# counter_cache_keys_to_save: 100 + +# saved caches +# If not set, the default directory is $CASSANDRA_HOME/data/saved_caches. +# saved_caches_directory: /var/lib/cassandra/saved_caches + +# Number of seconds the server will wait for each cache (row, key, etc ...) to load while starting +# the Cassandra process. Setting this to a negative value is equivalent to disabling all cache loading on startup +# while still having the cache during runtime. +# cache_load_timeout_seconds: 30 + +# commitlog_sync may be either "periodic", "group", or "batch." +# +# When in batch mode, Cassandra won't ack writes until the commit log +# has been flushed to disk. Each incoming write will trigger the flush task. +# commitlog_sync_batch_window_in_ms is a deprecated value. Previously it had +# almost no value, and is being removed. +# +# commitlog_sync_batch_window_in_ms: 2 +# +# group mode is similar to batch mode, where Cassandra will not ack writes +# until the commit log has been flushed to disk. The difference is group +# mode will wait up to commitlog_sync_group_window_in_ms between flushes. +# +# commitlog_sync_group_window_in_ms: 1000 +# +# the default option is "periodic" where writes may be acked immediately +# and the CommitLog is simply synced every commitlog_sync_period_in_ms +# milliseconds. +commitlog_sync: periodic +commitlog_sync_period_in_ms: 10000 + +# When in periodic commitlog mode, the number of milliseconds to block writes +# while waiting for a slow disk flush to complete. +# periodic_commitlog_sync_lag_block_in_ms: + +# The size of the individual commitlog file segments. A commitlog +# segment may be archived, deleted, or recycled once all the data +# in it (potentially from each columnfamily in the system) has been +# flushed to sstables. +# +# The default size is 32, which is almost always fine, but if you are +# archiving commitlog segments (see commitlog_archiving.properties), +# then you probably want a finer granularity of archiving; 8 or 16 MB +# is reasonable. +# Max mutation size is also configurable via max_mutation_size_in_kb setting in +# cassandra.yaml. The default is half the size commitlog_segment_size_in_mb * 1024. +# This should be positive and less than 2048. +# +# NOTE: If max_mutation_size_in_kb is set explicitly then commitlog_segment_size_in_mb must +# be set to at least twice the size of max_mutation_size_in_kb / 1024 +# +commitlog_segment_size_in_mb: 32 + +# Compression to apply to the commit log. If omitted, the commit log +# will be written uncompressed. LZ4, Snappy, and Deflate compressors +# are supported. +# commitlog_compression: +# - class_name: LZ4Compressor +# parameters: +# - + +# Compression to apply to SSTables as they flush for compressed tables. +# Note that tables without compression enabled do not respect this flag. +# +# As high ratio compressors like LZ4HC, Zstd, and Deflate can potentially +# block flushes for too long, the default is to flush with a known fast +# compressor in those cases. Options are: +# +# none : Flush without compressing blocks but while still doing checksums. +# fast : Flush with a fast compressor. If the table is already using a +# fast compressor that compressor is used. +# table: Always flush with the same compressor that the table uses. This +# was the pre 4.0 behavior. +# +# flush_compression: fast + +# any class that implements the SeedProvider interface and has a +# constructor that takes a Map of parameters will do. +seed_provider: + # Addresses of hosts that are deemed contact points. + # Cassandra nodes use this list of hosts to find each other and learn + # the topology of the ring. You must change this if you are running + # multiple nodes! + - class_name: org.apache.cassandra.locator.SimpleSeedProvider + parameters: + # seeds is actually a comma-delimited list of addresses. + # Ex: ",," + - seeds: "cass1,cass2" + +# For workloads with more data than can fit in memory, Cassandra's +# bottleneck will be reads that need to fetch data from +# disk. "concurrent_reads" should be set to (16 * number_of_drives) in +# order to allow the operations to enqueue low enough in the stack +# that the OS and drives can reorder them. Same applies to +# "concurrent_counter_writes", since counter writes read the current +# values before incrementing and writing them back. +# +# On the other hand, since writes are almost never IO bound, the ideal +# number of "concurrent_writes" is dependent on the number of cores in +# your system; (8 * number_of_cores) is a good rule of thumb. +concurrent_reads: 32 +concurrent_writes: 32 +concurrent_counter_writes: 32 + +# For materialized view writes, as there is a read involved, so this should +# be limited by the less of concurrent reads or concurrent writes. +concurrent_materialized_view_writes: 32 + +# Maximum memory to use for inter-node and client-server networking buffers. +# +# Defaults to the smaller of 1/16 of heap or 128MB. This pool is allocated off-heap, +# so is in addition to the memory allocated for heap. The cache also has on-heap +# overhead which is roughly 128 bytes per chunk (i.e. 0.2% of the reserved size +# if the default 64k chunk size is used). +# Memory is only allocated when needed. +# networking_cache_size_in_mb: 128 + +# Enable the sstable chunk cache. The chunk cache will store recently accessed +# sections of the sstable in-memory as uncompressed buffers. +# file_cache_enabled: false + +# Maximum memory to use for sstable chunk cache and buffer pooling. +# 32MB of this are reserved for pooling buffers, the rest is used for chunk cache +# that holds uncompressed sstable chunks. +# Defaults to the smaller of 1/4 of heap or 512MB. This pool is allocated off-heap, +# so is in addition to the memory allocated for heap. The cache also has on-heap +# overhead which is roughly 128 bytes per chunk (i.e. 0.2% of the reserved size +# if the default 64k chunk size is used). +# Memory is only allocated when needed. +# file_cache_size_in_mb: 512 + +# Flag indicating whether to allocate on or off heap when the sstable buffer +# pool is exhausted, that is when it has exceeded the maximum memory +# file_cache_size_in_mb, beyond which it will not cache buffers but allocate on request. + +# buffer_pool_use_heap_if_exhausted: true + +# The strategy for optimizing disk read +# Possible values are: +# ssd (for solid state disks, the default) +# spinning (for spinning disks) +# disk_optimization_strategy: ssd + +# Total permitted memory to use for memtables. Cassandra will stop +# accepting writes when the limit is exceeded until a flush completes, +# and will trigger a flush based on memtable_cleanup_threshold +# If omitted, Cassandra will set both to 1/4 the size of the heap. +# memtable_heap_space_in_mb: 2048 +# memtable_offheap_space_in_mb: 2048 + +# memtable_cleanup_threshold is deprecated. The default calculation +# is the only reasonable choice. See the comments on memtable_flush_writers +# for more information. +# +# Ratio of occupied non-flushing memtable size to total permitted size +# that will trigger a flush of the largest memtable. Larger mct will +# mean larger flushes and hence less compaction, but also less concurrent +# flush activity which can make it difficult to keep your disks fed +# under heavy write load. +# +# memtable_cleanup_threshold defaults to 1 / (memtable_flush_writers + 1) +# memtable_cleanup_threshold: 0.11 + +# Specify the way Cassandra allocates and manages memtable memory. +# Options are: +# +# heap_buffers +# on heap nio buffers +# +# offheap_buffers +# off heap (direct) nio buffers +# +# offheap_objects +# off heap objects +memtable_allocation_type: heap_buffers + +# Limit memory usage for Merkle tree calculations during repairs. The default +# is 1/16th of the available heap. The main tradeoff is that smaller trees +# have less resolution, which can lead to over-streaming data. If you see heap +# pressure during repairs, consider lowering this, but you cannot go below +# one megabyte. If you see lots of over-streaming, consider raising +# this or using subrange repair. +# +# For more details see https://issues.apache.org/jira/browse/CASSANDRA-14096. +# +# repair_session_space_in_mb: + +# Total space to use for commit logs on disk. +# +# If space gets above this value, Cassandra will flush every dirty CF +# in the oldest segment and remove it. So a small total commitlog space +# will tend to cause more flush activity on less-active columnfamilies. +# +# The default value is the smaller of 8192, and 1/4 of the total space +# of the commitlog volume. +# +# commitlog_total_space_in_mb: 8192 + +# This sets the number of memtable flush writer threads per disk +# as well as the total number of memtables that can be flushed concurrently. +# These are generally a combination of compute and IO bound. +# +# Memtable flushing is more CPU efficient than memtable ingest and a single thread +# can keep up with the ingest rate of a whole server on a single fast disk +# until it temporarily becomes IO bound under contention typically with compaction. +# At that point you need multiple flush threads. At some point in the future +# it may become CPU bound all the time. +# +# You can tell if flushing is falling behind using the MemtablePool.BlockedOnAllocation +# metric which should be 0, but will be non-zero if threads are blocked waiting on flushing +# to free memory. +# +# memtable_flush_writers defaults to two for a single data directory. +# This means that two memtables can be flushed concurrently to the single data directory. +# If you have multiple data directories the default is one memtable flushing at a time +# but the flush will use a thread per data directory so you will get two or more writers. +# +# Two is generally enough to flush on a fast disk [array] mounted as a single data directory. +# Adding more flush writers will result in smaller more frequent flushes that introduce more +# compaction overhead. +# +# There is a direct tradeoff between number of memtables that can be flushed concurrently +# and flush size and frequency. More is not better you just need enough flush writers +# to never stall waiting for flushing to free memory. +# +#memtable_flush_writers: 2 + +# Total space to use for change-data-capture logs on disk. +# +# If space gets above this value, Cassandra will throw WriteTimeoutException +# on Mutations including tables with CDC enabled. A CDCCompactor is responsible +# for parsing the raw CDC logs and deleting them when parsing is completed. +# +# The default value is the min of 4096 mb and 1/8th of the total space +# of the drive where cdc_raw_directory resides. +# cdc_total_space_in_mb: 4096 + +# When we hit our cdc_raw limit and the CDCCompactor is either running behind +# or experiencing backpressure, we check at the following interval to see if any +# new space for cdc-tracked tables has been made available. Default to 250ms +# cdc_free_space_check_interval_ms: 250 + +# A fixed memory pool size in MB for for SSTable index summaries. If left +# empty, this will default to 5% of the heap size. If the memory usage of +# all index summaries exceeds this limit, SSTables with low read rates will +# shrink their index summaries in order to meet this limit. However, this +# is a best-effort process. In extreme conditions Cassandra may need to use +# more than this amount of memory. +index_summary_capacity_in_mb: + +# How frequently index summaries should be resampled. This is done +# periodically to redistribute memory from the fixed-size pool to sstables +# proportional their recent read rates. Setting to -1 will disable this +# process, leaving existing index summaries at their current sampling level. +index_summary_resize_interval_in_minutes: 60 + +# Whether to, when doing sequential writing, fsync() at intervals in +# order to force the operating system to flush the dirty +# buffers. Enable this to avoid sudden dirty buffer flushing from +# impacting read latencies. Almost always a good idea on SSDs; not +# necessarily on platters. +trickle_fsync: false +trickle_fsync_interval_in_kb: 10240 + +# TCP port, for commands and data +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +storage_port: 7000 + +# SSL port, for legacy encrypted communication. This property is unused unless enabled in +# server_encryption_options (see below). As of cassandra 4.0, this property is deprecated +# as a single port can be used for either/both secure and insecure connections. +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +ssl_storage_port: 7001 + +# Address or interface to bind to and tell other Cassandra nodes to connect to. +# You _must_ change this if you want multiple nodes to be able to communicate! +# +# Set listen_address OR listen_interface, not both. +# +# Leaving it blank leaves it up to InetAddress.getLocalHost(). This +# will always do the Right Thing _if_ the node is properly configured +# (hostname, name resolution, etc), and the Right Thing is to use the +# address associated with the hostname (it might not be). If unresolvable +# it will fall back to InetAddress.getLoopbackAddress(), which is wrong for production systems. +# +# Setting listen_address to 0.0.0.0 is always wrong. +# +listen_address: 172.20.0.6 + +# Set listen_address OR listen_interface, not both. Interfaces must correspond +# to a single address, IP aliasing is not supported. +# listen_interface: eth0 + +# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address +# you can specify which should be chosen using listen_interface_prefer_ipv6. If false the first ipv4 +# address will be used. If true the first ipv6 address will be used. Defaults to false preferring +# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6. +# listen_interface_prefer_ipv6: false + +# Address to broadcast to other Cassandra nodes +# Leaving this blank will set it to the same value as listen_address +broadcast_address: 172.20.0.6 + +# When using multiple physical network interfaces, set this +# to true to listen on broadcast_address in addition to +# the listen_address, allowing nodes to communicate in both +# interfaces. +# Ignore this property if the network configuration automatically +# routes between the public and private networks such as EC2. +# listen_on_broadcast_address: false + +# Internode authentication backend, implementing IInternodeAuthenticator; +# used to allow/disallow connections from peer nodes. +# internode_authenticator: org.apache.cassandra.auth.AllowAllInternodeAuthenticator + +# Whether to start the native transport server. +# The address on which the native transport is bound is defined by rpc_address. +start_native_transport: true +# port for the CQL native transport to listen for clients on +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +native_transport_port: 9042 +# Enabling native transport encryption in client_encryption_options allows you to either use +# encryption for the standard port or to use a dedicated, additional port along with the unencrypted +# standard native_transport_port. +# Enabling client encryption and keeping native_transport_port_ssl disabled will use encryption +# for native_transport_port. Setting native_transport_port_ssl to a different value +# from native_transport_port will use encryption for native_transport_port_ssl while +# keeping native_transport_port unencrypted. +# native_transport_port_ssl: 9142 +# The maximum threads for handling requests (note that idle threads are stopped +# after 30 seconds so there is not corresponding minimum setting). +# native_transport_max_threads: 128 +# +# The maximum size of allowed frame. Frame (requests) larger than this will +# be rejected as invalid. The default is 256MB. If you're changing this parameter, +# you may want to adjust max_value_size_in_mb accordingly. This should be positive and less than 2048. +# native_transport_max_frame_size_in_mb: 256 + +# The maximum number of concurrent client connections. +# The default is -1, which means unlimited. +# native_transport_max_concurrent_connections: -1 + +# The maximum number of concurrent client connections per source ip. +# The default is -1, which means unlimited. +# native_transport_max_concurrent_connections_per_ip: -1 + +# Controls whether Cassandra honors older, yet currently supported, protocol versions. +# The default is true, which means all supported protocols will be honored. +native_transport_allow_older_protocols: true + +# Controls when idle client connections are closed. Idle connections are ones that had neither reads +# nor writes for a time period. +# +# Clients may implement heartbeats by sending OPTIONS native protocol message after a timeout, which +# will reset idle timeout timer on the server side. To close idle client connections, corresponding +# values for heartbeat intervals have to be set on the client side. +# +# Idle connection timeouts are disabled by default. +# native_transport_idle_timeout_in_ms: 60000 + +# The address or interface to bind the native transport server to. +# +# Set rpc_address OR rpc_interface, not both. +# +# Leaving rpc_address blank has the same effect as on listen_address +# (i.e. it will be based on the configured hostname of the node). +# +# Note that unlike listen_address, you can specify 0.0.0.0, but you must also +# set broadcast_rpc_address to a value other than 0.0.0.0. +# +# For security reasons, you should not expose this port to the internet. Firewall it if needed. +rpc_address: 0.0.0.0 + +# Set rpc_address OR rpc_interface, not both. Interfaces must correspond +# to a single address, IP aliasing is not supported. +# rpc_interface: eth1 + +# If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address +# you can specify which should be chosen using rpc_interface_prefer_ipv6. If false the first ipv4 +# address will be used. If true the first ipv6 address will be used. Defaults to false preferring +# ipv4. If there is only one address it will be selected regardless of ipv4/ipv6. +# rpc_interface_prefer_ipv6: false + +# RPC address to broadcast to drivers and other Cassandra nodes. This cannot +# be set to 0.0.0.0. If left blank, this will be set to the value of +# rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must +# be set. +broadcast_rpc_address: 172.20.0.6 + +# enable or disable keepalive on rpc/native connections +rpc_keepalive: true + +# Uncomment to set socket buffer size for internode communication +# Note that when setting this, the buffer size is limited by net.core.wmem_max +# and when not setting it it is defined by net.ipv4.tcp_wmem +# See also: +# /proc/sys/net/core/wmem_max +# /proc/sys/net/core/rmem_max +# /proc/sys/net/ipv4/tcp_wmem +# /proc/sys/net/ipv4/tcp_wmem +# and 'man tcp' +# internode_socket_send_buffer_size_in_bytes: + +# Uncomment to set socket buffer size for internode communication +# Note that when setting this, the buffer size is limited by net.core.wmem_max +# and when not setting it it is defined by net.ipv4.tcp_wmem +# internode_socket_receive_buffer_size_in_bytes: + +# Set to true to have Cassandra create a hard link to each sstable +# flushed or streamed locally in a backups/ subdirectory of the +# keyspace data. Removing these links is the operator's +# responsibility. +incremental_backups: false + +# Whether or not to take a snapshot before each compaction. Be +# careful using this option, since Cassandra won't clean up the +# snapshots for you. Mostly useful if you're paranoid when there +# is a data format change. +snapshot_before_compaction: false + +# Whether or not a snapshot is taken of the data before keyspace truncation +# or dropping of column families. The STRONGLY advised default of true +# should be used to provide data safety. If you set this flag to false, you will +# lose data on truncation or drop. +auto_snapshot: true + +# The act of creating or clearing a snapshot involves creating or removing +# potentially tens of thousands of links, which can cause significant performance +# impact, especially on consumer grade SSDs. A non-zero value here can +# be used to throttle these links to avoid negative performance impact of +# taking and clearing snapshots +snapshot_links_per_second: 0 + +# Granularity of the collation index of rows within a partition. +# Increase if your rows are large, or if you have a very large +# number of rows per partition. The competing goals are these: +# +# - a smaller granularity means more index entries are generated +# and looking up rows withing the partition by collation column +# is faster +# - but, Cassandra will keep the collation index in memory for hot +# rows (as part of the key cache), so a larger granularity means +# you can cache more hot rows +column_index_size_in_kb: 64 + +# Per sstable indexed key cache entries (the collation index in memory +# mentioned above) exceeding this size will not be held on heap. +# This means that only partition information is held on heap and the +# index entries are read from disk. +# +# Note that this size refers to the size of the +# serialized index information and not the size of the partition. +column_index_cache_size_in_kb: 2 + +# Number of simultaneous compactions to allow, NOT including +# validation "compactions" for anti-entropy repair. Simultaneous +# compactions can help preserve read performance in a mixed read/write +# workload, by mitigating the tendency of small sstables to accumulate +# during a single long running compactions. The default is usually +# fine and if you experience problems with compaction running too +# slowly or too fast, you should look at +# compaction_throughput_mb_per_sec first. +# +# concurrent_compactors defaults to the smaller of (number of disks, +# number of cores), with a minimum of 2 and a maximum of 8. +# +# If your data directories are backed by SSD, you should increase this +# to the number of cores. +#concurrent_compactors: 1 + +# Number of simultaneous repair validations to allow. If not set or set to +# a value less than 1, it defaults to the value of concurrent_compactors. +# To set a value greeater than concurrent_compactors at startup, the system +# property cassandra.allow_unlimited_concurrent_validations must be set to +# true. To dynamically resize to a value > concurrent_compactors on a running +# node, first call the bypassConcurrentValidatorsLimit method on the +# org.apache.cassandra.db:type=StorageService mbean +# concurrent_validations: 0 + +# Number of simultaneous materialized view builder tasks to allow. +concurrent_materialized_view_builders: 1 + +# Throttles compaction to the given total throughput across the entire +# system. The faster you insert data, the faster you need to compact in +# order to keep the sstable count down, but in general, setting this to +# 16 to 32 times the rate you are inserting data is more than sufficient. +# Setting this to 0 disables throttling. Note that this accounts for all types +# of compaction, including validation compaction (building Merkle trees +# for repairs). +compaction_throughput_mb_per_sec: 64 + +# When compacting, the replacement sstable(s) can be opened before they +# are completely written, and used in place of the prior sstables for +# any range that has been written. This helps to smoothly transfer reads +# between the sstables, reducing page cache churn and keeping hot rows hot +sstable_preemptive_open_interval_in_mb: 50 + +# When enabled, permits Cassandra to zero-copy stream entire eligible +# SSTables between nodes, including every component. +# This speeds up the network transfer significantly subject to +# throttling specified by stream_throughput_outbound_megabits_per_sec. +# Enabling this will reduce the GC pressure on sending and receiving node. +# When unset, the default is enabled. While this feature tries to keep the +# disks balanced, it cannot guarantee it. This feature will be automatically +# disabled if internode encryption is enabled. +# stream_entire_sstables: true + +# Throttles all outbound streaming file transfers on this node to the +# given total throughput in Mbps. This is necessary because Cassandra does +# mostly sequential IO when streaming data during bootstrap or repair, which +# can lead to saturating the network connection and degrading rpc performance. +# When unset, the default is 200 Mbps or 25 MB/s. +# stream_throughput_outbound_megabits_per_sec: 200 + +# Throttles all streaming file transfer between the datacenters, +# this setting allows users to throttle inter dc stream throughput in addition +# to throttling all network stream traffic as configured with +# stream_throughput_outbound_megabits_per_sec +# When unset, the default is 200 Mbps or 25 MB/s +# inter_dc_stream_throughput_outbound_megabits_per_sec: 200 + +# Server side timeouts for requests. The server will return a timeout exception +# to the client if it can't complete an operation within the corresponding +# timeout. Those settings are a protection against: +# 1) having client wait on an operation that might never terminate due to some +# failures. +# 2) operations that use too much CPU/read too much data (leading to memory build +# up) by putting a limit to how long an operation will execute. +# For this reason, you should avoid putting these settings too high. In other words, +# if you are timing out requests because of underlying resource constraints then +# increasing the timeout will just cause more problems. Of course putting them too +# low is equally ill-advised since clients could get timeouts even for successful +# operations just because the timeout setting is too tight. + +# How long the coordinator should wait for read operations to complete. +# Lowest acceptable value is 10 ms. +read_request_timeout_in_ms: 5000 +# How long the coordinator should wait for seq or index scans to complete. +# Lowest acceptable value is 10 ms. +range_request_timeout_in_ms: 10000 +# How long the coordinator should wait for writes to complete. +# Lowest acceptable value is 10 ms. +write_request_timeout_in_ms: 2000 +# How long the coordinator should wait for counter writes to complete. +# Lowest acceptable value is 10 ms. +counter_write_request_timeout_in_ms: 5000 +# How long a coordinator should continue to retry a CAS operation +# that contends with other proposals for the same row. +# Lowest acceptable value is 10 ms. +cas_contention_timeout_in_ms: 1000 +# How long the coordinator should wait for truncates to complete +# (This can be much longer, because unless auto_snapshot is disabled +# we need to flush first so we can snapshot before removing the data.) +# Lowest acceptable value is 10 ms. +truncate_request_timeout_in_ms: 60000 +# The default timeout for other, miscellaneous operations. +# Lowest acceptable value is 10 ms. +request_timeout_in_ms: 10000 + +# Defensive settings for protecting Cassandra from true network partitions. +# See (CASSANDRA-14358) for details. +# +# The amount of time to wait for internode tcp connections to establish. +# internode_tcp_connect_timeout_in_ms: 2000 +# +# The amount of time unacknowledged data is allowed on a connection before we throw out the connection +# Note this is only supported on Linux + epoll, and it appears to behave oddly above a setting of 30000 +# (it takes much longer than 30s) as of Linux 4.12. If you want something that high set this to 0 +# which picks up the OS default and configure the net.ipv4.tcp_retries2 sysctl to be ~8. +# internode_tcp_user_timeout_in_ms: 30000 + +# The amount of time unacknowledged data is allowed on a streaming connection. +# The default is 5 minutes. Increase it or set it to 0 in order to increase the timeout. +# internode_streaming_tcp_user_timeout_in_ms: 300000 + +# Global, per-endpoint and per-connection limits imposed on messages queued for delivery to other nodes +# and waiting to be processed on arrival from other nodes in the cluster. These limits are applied to the on-wire +# size of the message being sent or received. +# +# The basic per-link limit is consumed in isolation before any endpoint or global limit is imposed. +# Each node-pair has three links: urgent, small and large. So any given node may have a maximum of +# N*3*(internode_application_send_queue_capacity_in_bytes+internode_application_receive_queue_capacity_in_bytes) +# messages queued without any coordination between them although in practice, with token-aware routing, only RF*tokens +# nodes should need to communicate with significant bandwidth. +# +# The per-endpoint limit is imposed on all messages exceeding the per-link limit, simultaneously with the global limit, +# on all links to or from a single node in the cluster. +# The global limit is imposed on all messages exceeding the per-link limit, simultaneously with the per-endpoint limit, +# on all links to or from any node in the cluster. +# +# internode_application_send_queue_capacity_in_bytes: 4194304 #4MiB +# internode_application_send_queue_reserve_endpoint_capacity_in_bytes: 134217728 #128MiB +# internode_application_send_queue_reserve_global_capacity_in_bytes: 536870912 #512MiB +# internode_application_receive_queue_capacity_in_bytes: 4194304 #4MiB +# internode_application_receive_queue_reserve_endpoint_capacity_in_bytes: 134217728 #128MiB +# internode_application_receive_queue_reserve_global_capacity_in_bytes: 536870912 #512MiB + + +# How long before a node logs slow queries. Select queries that take longer than +# this timeout to execute, will generate an aggregated log message, so that slow queries +# can be identified. Set this value to zero to disable slow query logging. +slow_query_log_timeout_in_ms: 500 + +# Enable operation timeout information exchange between nodes to accurately +# measure request timeouts. If disabled, replicas will assume that requests +# were forwarded to them instantly by the coordinator, which means that +# under overload conditions we will waste that much extra time processing +# already-timed-out requests. +# +# Warning: It is generally assumed that users have setup NTP on their clusters, and that clocks are modestly in sync, +# since this is a requirement for general correctness of last write wins. +#cross_node_timeout: true + +# Set keep-alive period for streaming +# This node will send a keep-alive message periodically with this period. +# If the node does not receive a keep-alive message from the peer for +# 2 keep-alive cycles the stream session times out and fail +# Default value is 300s (5 minutes), which means stalled stream +# times out in 10 minutes by default +# streaming_keep_alive_period_in_secs: 300 + +# Limit number of connections per host for streaming +# Increase this when you notice that joins are CPU-bound rather that network +# bound (for example a few nodes with big files). +# streaming_connections_per_host: 1 + + +# phi value that must be reached for a host to be marked down. +# most users should never need to adjust this. +# phi_convict_threshold: 8 + +# endpoint_snitch -- Set this to a class that implements +# IEndpointSnitch. The snitch has two functions: +# +# - it teaches Cassandra enough about your network topology to route +# requests efficiently +# - it allows Cassandra to spread replicas around your cluster to avoid +# correlated failures. It does this by grouping machines into +# "datacenters" and "racks." Cassandra will do its best not to have +# more than one replica on the same "rack" (which may not actually +# be a physical location) +# +# CASSANDRA WILL NOT ALLOW YOU TO SWITCH TO AN INCOMPATIBLE SNITCH +# ONCE DATA IS INSERTED INTO THE CLUSTER. This would cause data loss. +# This means that if you start with the default SimpleSnitch, which +# locates every node on "rack1" in "datacenter1", your only options +# if you need to add another datacenter are GossipingPropertyFileSnitch +# (and the older PFS). From there, if you want to migrate to an +# incompatible snitch like Ec2Snitch you can do it by adding new nodes +# under Ec2Snitch (which will locate them in a new "datacenter") and +# decommissioning the old ones. +# +# Out of the box, Cassandra provides: +# +# SimpleSnitch: +# Treats Strategy order as proximity. This can improve cache +# locality when disabling read repair. Only appropriate for +# single-datacenter deployments. +# +# GossipingPropertyFileSnitch +# This should be your go-to snitch for production use. The rack +# and datacenter for the local node are defined in +# cassandra-rackdc.properties and propagated to other nodes via +# gossip. If cassandra-topology.properties exists, it is used as a +# fallback, allowing migration from the PropertyFileSnitch. +# +# PropertyFileSnitch: +# Proximity is determined by rack and data center, which are +# explicitly configured in cassandra-topology.properties. +# +# Ec2Snitch: +# Appropriate for EC2 deployments in a single Region. Loads Region +# and Availability Zone information from the EC2 API. The Region is +# treated as the datacenter, and the Availability Zone as the rack. +# Only private IPs are used, so this will not work across multiple +# Regions. +# +# Ec2MultiRegionSnitch: +# Uses public IPs as broadcast_address to allow cross-region +# connectivity. (Thus, you should set seed addresses to the public +# IP as well.) You will need to open the storage_port or +# ssl_storage_port on the public IP firewall. (For intra-Region +# traffic, Cassandra will switch to the private IP after +# establishing a connection.) +# +# RackInferringSnitch: +# Proximity is determined by rack and data center, which are +# assumed to correspond to the 3rd and 2nd octet of each node's IP +# address, respectively. Unless this happens to match your +# deployment conventions, this is best used as an example of +# writing a custom Snitch class and is provided in that spirit. +# +# You can use a custom Snitch by setting this to the full class name +# of the snitch, which will be assumed to be on your classpath. +endpoint_snitch: GossipingPropertyFileSnitch + +# controls how often to perform the more expensive part of host score +# calculation +dynamic_snitch_update_interval_in_ms: 100 +# controls how often to reset all host scores, allowing a bad host to +# possibly recover +dynamic_snitch_reset_interval_in_ms: 600000 +# if set greater than zero, this will allow +# 'pinning' of replicas to hosts in order to increase cache capacity. +# The badness threshold will control how much worse the pinned host has to be +# before the dynamic snitch will prefer other replicas over it. This is +# expressed as a double which represents a percentage. Thus, a value of +# 0.2 means Cassandra would continue to prefer the static snitch values +# until the pinned host was 20% worse than the fastest. +dynamic_snitch_badness_threshold: 1.0 + +# Configure server-to-server internode encryption +# +# JVM and netty defaults for supported SSL socket protocols and cipher suites can +# be replaced using custom encryption options. This is not recommended +# unless you have policies in place that dictate certain settings, or +# need to disable vulnerable ciphers or protocols in case the JVM cannot +# be updated. +# +# FIPS compliant settings can be configured at JVM level and should not +# involve changing encryption settings here: +# https://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/FIPS.html +# +# **NOTE** this default configuration is an insecure configuration. If you need to +# enable server-to-server encryption generate server keystores (and truststores for mutual +# authentication) per: +# http://download.oracle.com/javase/8/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore +# Then perform the following configuration changes: +# +# Step 1: Set internode_encryption= and explicitly set optional=true. Restart all nodes +# +# Step 2: Set optional=false (or remove it) and if you generated truststores and want to use mutual +# auth set require_client_auth=true. Restart all nodes +server_encryption_options: + # On outbound connections, determine which type of peers to securely connect to. + # The available options are : + # none : Do not encrypt outgoing connections + # dc : Encrypt connections to peers in other datacenters but not within datacenters + # rack : Encrypt connections to peers in other racks but not within racks + # all : Always use encrypted connections + internode_encryption: none + # When set to true, encrypted and unencrypted connections are allowed on the storage_port + # This should _only be true_ while in unencrypted or transitional operation + # optional defaults to true if internode_encryption is none + # optional: true + # If enabled, will open up an encrypted listening socket on ssl_storage_port. Should only be used + # during upgrade to 4.0; otherwise, set to false. + enable_legacy_ssl_storage_port: false + # Set to a valid keystore if internode_encryption is dc, rack or all + keystore: conf/.keystore + keystore_password: cassandra + # Verify peer server certificates + require_client_auth: false + # Set to a valid trustore if require_client_auth is true + truststore: conf/.truststore + truststore_password: cassandra + # Verify that the host name in the certificate matches the connected host + require_endpoint_verification: false + # More advanced defaults: + # protocol: TLS + # store_type: JKS + # cipher_suites: [ + # TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, + # TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA, + # TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, TLS_RSA_WITH_AES_128_GCM_SHA256, TLS_RSA_WITH_AES_128_CBC_SHA, + # TLS_RSA_WITH_AES_256_CBC_SHA + # ] + +# Configure client-to-server encryption. +# +# **NOTE** this default configuration is an insecure configuration. If you need to +# enable client-to-server encryption generate server keystores (and truststores for mutual +# authentication) per: +# http://download.oracle.com/javase/8/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore +# Then perform the following configuration changes: +# +# Step 1: Set enabled=true and explicitly set optional=true. Restart all nodes +# +# Step 2: Set optional=false (or remove it) and if you generated truststores and want to use mutual +# auth set require_client_auth=true. Restart all nodes +client_encryption_options: + # Enable client-to-server encryption + enabled: false + # When set to true, encrypted and unencrypted connections are allowed on the native_transport_port + # This should _only be true_ while in unencrypted or transitional operation + # optional defaults to true when enabled is false, and false when enabled is true. + # optional: true + # Set keystore and keystore_password to valid keystores if enabled is true + keystore: conf/.keystore + keystore_password: cassandra + # Verify client certificates + require_client_auth: false + # Set trustore and truststore_password if require_client_auth is true + # truststore: conf/.truststore + # truststore_password: cassandra + # More advanced defaults: + # protocol: TLS + # store_type: JKS + # cipher_suites: [ + # TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, + # TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA, + # TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, TLS_RSA_WITH_AES_128_GCM_SHA256, TLS_RSA_WITH_AES_128_CBC_SHA, + # TLS_RSA_WITH_AES_256_CBC_SHA + # ] + +# internode_compression controls whether traffic between nodes is +# compressed. +# Can be: +# +# all +# all traffic is compressed +# +# dc +# traffic between different datacenters is compressed +# +# none +# nothing is compressed. +internode_compression: dc + +# Enable or disable tcp_nodelay for inter-dc communication. +# Disabling it will result in larger (but fewer) network packets being sent, +# reducing overhead from the TCP protocol itself, at the cost of increasing +# latency if you block for cross-datacenter responses. +inter_dc_tcp_nodelay: false + +# TTL for different trace types used during logging of the repair process. +tracetype_query_ttl: 86400 +tracetype_repair_ttl: 604800 + +# If unset, all GC Pauses greater than gc_log_threshold_in_ms will log at +# INFO level +# UDFs (user defined functions) are disabled by default. +# As of Cassandra 3.0 there is a sandbox in place that should prevent execution of evil code. +enable_user_defined_functions: false + +# Enables scripted UDFs (JavaScript UDFs). +# Java UDFs are always enabled, if enable_user_defined_functions is true. +# Enable this option to be able to use UDFs with "language javascript" or any custom JSR-223 provider. +# This option has no effect, if enable_user_defined_functions is false. +enable_scripted_user_defined_functions: false + +# The default Windows kernel timer and scheduling resolution is 15.6ms for power conservation. +# Lowering this value on Windows can provide much tighter latency and better throughput, however +# some virtualized environments may see a negative performance impact from changing this setting +# below their system default. The sysinternals 'clockres' tool can confirm your system's default +# setting. +windows_timer_interval: 1 + + +# Enables encrypting data at-rest (on disk). Different key providers can be plugged in, but the default reads from +# a JCE-style keystore. A single keystore can hold multiple keys, but the one referenced by +# the "key_alias" is the only key that will be used for encrypt opertaions; previously used keys +# can still (and should!) be in the keystore and will be used on decrypt operations +# (to handle the case of key rotation). +# +# It is strongly recommended to download and install Java Cryptography Extension (JCE) +# Unlimited Strength Jurisdiction Policy Files for your version of the JDK. +# (current link: http://www.oracle.com/technetwork/java/javase/downloads/jce8-download-2133166.html) +# +# Currently, only the following file types are supported for transparent data encryption, although +# more are coming in future cassandra releases: commitlog, hints +transparent_data_encryption_options: + enabled: false + chunk_length_kb: 64 + cipher: AES/CBC/PKCS5Padding + key_alias: testing:1 + # CBC IV length for AES needs to be 16 bytes (which is also the default size) + # iv_length: 16 + key_provider: + - class_name: org.apache.cassandra.security.JKSKeyProvider + parameters: + - keystore: conf/.keystore + keystore_password: cassandra + store_type: JCEKS + key_password: cassandra + + +##################### +# SAFETY THRESHOLDS # +##################### + +# When executing a scan, within or across a partition, we need to keep the +# tombstones seen in memory so we can return them to the coordinator, which +# will use them to make sure other replicas also know about the deleted rows. +# With workloads that generate a lot of tombstones, this can cause performance +# problems and even exaust the server heap. +# (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets) +# Adjust the thresholds here if you understand the dangers and want to +# scan more tombstones anyway. These thresholds may also be adjusted at runtime +# using the StorageService mbean. +tombstone_warn_threshold: 1000 +tombstone_failure_threshold: 100000 + +# Filtering and secondary index queries at read consistency levels above ONE/LOCAL_ONE use a +# mechanism called replica filtering protection to ensure that results from stale replicas do +# not violate consistency. (See CASSANDRA-8272 and CASSANDRA-15907 for more details.) This +# mechanism materializes replica results by partition on-heap at the coordinator. The more possibly +# stale results returned by the replicas, the more rows materialized during the query. +replica_filtering_protection: + # These thresholds exist to limit the damage severely out-of-date replicas can cause during these + # queries. They limit the number of rows from all replicas individual index and filtering queries + # can materialize on-heap to return correct results at the desired read consistency level. + # + # "cached_replica_rows_warn_threshold" is the per-query threshold at which a warning will be logged. + # "cached_replica_rows_fail_threshold" is the per-query threshold at which the query will fail. + # + # These thresholds may also be adjusted at runtime using the StorageService mbean. + # + # If the failure threshold is breached, it is likely that either the current page/fetch size + # is too large or one or more replicas is severely out-of-sync and in need of repair. + cached_rows_warn_threshold: 2000 + cached_rows_fail_threshold: 32000 + +# Log WARN on any multiple-partition batch size exceeding this value. 5kb per batch by default. +# Caution should be taken on increasing the size of this threshold as it can lead to node instability. +batch_size_warn_threshold_in_kb: 5 + +# Fail any multiple-partition batch exceeding this value. 50kb (10x warn threshold) by default. +batch_size_fail_threshold_in_kb: 50 + +# Log WARN on any batches not of type LOGGED than span across more partitions than this limit +unlogged_batch_across_partitions_warn_threshold: 10 + +# Log a warning when compacting partitions larger than this value +compaction_large_partition_warning_threshold_mb: 100 + +# GC Pauses greater than 200 ms will be logged at INFO level +# This threshold can be adjusted to minimize logging if necessary +# gc_log_threshold_in_ms: 200 + +# GC Pauses greater than gc_warn_threshold_in_ms will be logged at WARN level +# Adjust the threshold based on your application throughput requirement. Setting to 0 +# will deactivate the feature. +# gc_warn_threshold_in_ms: 1000 + +# Maximum size of any value in SSTables. Safety measure to detect SSTable corruption +# early. Any value size larger than this threshold will result into marking an SSTable +# as corrupted. This should be positive and less than 2048. +# max_value_size_in_mb: 256 + +# Track a metric per keyspace indicating whether replication achieved the ideal consistency +# level for writes without timing out. This is different from the consistency level requested by +# each write which may be lower in order to facilitate availability. +# ideal_consistency_level: EACH_QUORUM + +# Automatically upgrade sstables after upgrade - if there is no ordinary compaction to do, the +# oldest non-upgraded sstable will get upgraded to the latest version +# automatic_sstable_upgrade: false +# Limit the number of concurrent sstable upgrades +# max_concurrent_automatic_sstable_upgrades: 1 + +# Audit logging - Logs every incoming CQL command request, authentication to a node. See the docs +# on audit_logging for full details about the various configuration options. +audit_logging_options: + enabled: false + logger: + - class_name: BinAuditLogger + # audit_logs_dir: + # included_keyspaces: + # excluded_keyspaces: system, system_schema, system_virtual_schema + # included_categories: + # excluded_categories: + # included_users: + # excluded_users: + # roll_cycle: HOURLY + # block: true + # max_queue_weight: 268435456 # 256 MiB + # max_log_size: 17179869184 # 16 GiB + ## archive command is "/path/to/script.sh %path" where %path is replaced with the file being rolled: + # archive_command: + # max_archive_retries: 10 + + +# default options for full query logging - these can be overridden from command line when executing +# nodetool enablefullquerylog +#full_query_logging_options: + # log_dir: + # roll_cycle: HOURLY + # block: true + # max_queue_weight: 268435456 # 256 MiB + # max_log_size: 17179869184 # 16 GiB + ## archive command is "/path/to/script.sh %path" where %path is replaced with the file being rolled: + # archive_command: + # max_archive_retries: 10 + +# validate tombstones on reads and compaction +# can be either "disabled", "warn" or "exception" +# corrupted_tombstone_strategy: disabled + +# Diagnostic Events # +# If enabled, diagnostic events can be helpful for troubleshooting operational issues. Emitted events contain details +# on internal state and temporal relationships across events, accessible by clients via JMX. +diagnostic_events_enabled: false + +# Use native transport TCP message coalescing. If on upgrade to 4.0 you found your throughput decreasing, and in +# particular you run an old kernel or have very fewer client connections, this option might be worth evaluating. +#native_transport_flush_in_batches_legacy: false + +# Enable tracking of repaired state of data during reads and comparison between replicas +# Mismatches between the repaired sets of replicas can be characterized as either confirmed +# or unconfirmed. In this context, unconfirmed indicates that the presence of pending repair +# sessions, unrepaired partition tombstones, or some other condition means that the disparity +# cannot be considered conclusive. Confirmed mismatches should be a trigger for investigation +# as they may be indicative of corruption or data loss. +# There are separate flags for range vs partition reads as single partition reads are only tracked +# when CL > 1 and a digest mismatch occurs. Currently, range queries don't use digests so if +# enabled for range reads, all range reads will include repaired data tracking. As this adds +# some overhead, operators may wish to disable it whilst still enabling it for partition reads +repaired_data_tracking_for_range_reads_enabled: false +repaired_data_tracking_for_partition_reads_enabled: false +# If false, only confirmed mismatches will be reported. If true, a separate metric for unconfirmed +# mismatches will also be recorded. This is to avoid potential signal:noise issues are unconfirmed +# mismatches are less actionable than confirmed ones. +report_unconfirmed_repaired_data_mismatches: false + +# Having many tables and/or keyspaces negatively affects performance of many operations in the +# cluster. When the number of tables/keyspaces in the cluster exceeds the following thresholds +# a client warning will be sent back to the user when creating a table or keyspace. +# table_count_warn_threshold: 150 +# keyspace_count_warn_threshold: 40 + +######################### +# EXPERIMENTAL FEATURES # +######################### + +# Enables materialized view creation on this node. +# Materialized views are considered experimental and are not recommended for production use. +enable_materialized_views: true + +# Enables SASI index creation on this node. +# SASI indexes are considered experimental and are not recommended for production use. +enable_sasi_indexes: false + +# Enables creation of transiently replicated keyspaces on this node. +# Transient replication is experimental and is not recommended for production use. +enable_transient_replication: false + +# Enables the used of 'ALTER ... DROP COMPACT STORAGE' statements on this node. +# 'ALTER ... DROP COMPACT STORAGE' is considered experimental and is not recommended for production use. +enable_drop_compact_storage: false diff --git a/etc/cass_startup_client/commitlog_archiving.properties b/etc/cass_startup_client/commitlog_archiving.properties new file mode 100644 index 0000000..393259c --- /dev/null +++ b/etc/cass_startup_client/commitlog_archiving.properties @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# commitlog archiving configuration. Leave blank to disable. + +# Command to execute to archive a commitlog segment +# Parameters: %path => Fully qualified path of the segment to archive +# %name => Name of the commit log. +# Example: archive_command=/bin/ln %path /backup/%name +# +# Limitation: *_command= expects one command with arguments. STDOUT +# and STDIN or multiple commands cannot be executed. You might want +# to script multiple commands and add a pointer here. +archive_command= + +# Command to execute to make an archived commitlog live again. +# Parameters: %from is the full path to an archived commitlog segment (from restore_directories) +# %to is the live commitlog directory +# Example: restore_command=/bin/cp -f %from %to +restore_command= + +# Directory to scan the recovery files in. +restore_directories= + +# Restore mutations created up to and including this timestamp in GMT. +# Format: yyyy:MM:dd HH:mm:ss (2012:04:31 20:43:12) +# +# Recovery will continue through the segment when the first client-supplied +# timestamp greater than this time is encountered, but only mutations less than +# or equal to this timestamp will be applied. +restore_point_in_time= + +# precision of the timestamp used in the inserts (MILLISECONDS, MICROSECONDS, ...) +precision=MICROSECONDS diff --git a/etc/cass_startup_client/jvm-clients.options b/etc/cass_startup_client/jvm-clients.options new file mode 100644 index 0000000..6181ed0 --- /dev/null +++ b/etc/cass_startup_client/jvm-clients.options @@ -0,0 +1,10 @@ +########################################################################### +# jvm-clients.options # +# # +# See jvm8-clients.options and jvm11-clients.options for Java version # +# specific options. # +########################################################################### + +# intentionally left empty + +# The newline in the end of file is intentional diff --git a/etc/cass_startup_client/jvm-server.options b/etc/cass_startup_client/jvm-server.options new file mode 100644 index 0000000..46967f4 --- /dev/null +++ b/etc/cass_startup_client/jvm-server.options @@ -0,0 +1,188 @@ +########################################################################### +# jvm-server.options # +# # +# - all flags defined here will be used by cassandra to startup the JVM # +# - one flag should be specified per line # +# - lines that do not start with '-' will be ignored # +# - only static flags are accepted (no variables or parameters) # +# - dynamic flags will be appended to these on cassandra-env # +# # +# See jvm8-server.options and jvm11-server.options for Java version # +# specific options. # +########################################################################### + +###################### +# STARTUP PARAMETERS # +###################### + +# Uncomment any of the following properties to enable specific startup parameters + +# In a multi-instance deployment, multiple Cassandra instances will independently assume that all +# CPU processors are available to it. This setting allows you to specify a smaller set of processors +# and perhaps have affinity. +#-Dcassandra.available_processors=number_of_processors + +# The directory location of the cassandra.yaml file. +#-Dcassandra.config=directory + +# Sets the initial partitioner token for a node the first time the node is started. +#-Dcassandra.initial_token=token + +# Set to false to start Cassandra on a node but not have the node join the cluster. +#-Dcassandra.join_ring=true|false + +# Set to false to clear all gossip state for the node on restart. Use when you have changed node +# information in cassandra.yaml (such as listen_address). +#-Dcassandra.load_ring_state=true|false + +# Enable pluggable metrics reporter. See Pluggable metrics reporting in Cassandra 2.0.2. +#-Dcassandra.metricsReporterConfigFile=file + +# Set the port on which the CQL native transport listens for clients. (Default: 9042) +#-Dcassandra.native_transport_port=port + +# Overrides the partitioner. (Default: org.apache.cassandra.dht.Murmur3Partitioner) +#-Dcassandra.partitioner=partitioner + +# To replace a node that has died, restart a new node in its place specifying the address of the +# dead node. The new node must not have any data in its data directory, that is, it must be in the +# same state as before bootstrapping. +#-Dcassandra.replace_address=listen_address or broadcast_address of dead node + +# Allow restoring specific tables from an archived commit log. +#-Dcassandra.replayList=table + +# Allows overriding of the default RING_DELAY (30000ms), which is the amount of time a node waits +# before joining the ring. +#-Dcassandra.ring_delay_ms=ms + +# Set the SSL port for encrypted communication. (Default: 7001) +#-Dcassandra.ssl_storage_port=port + +# Set the port for inter-node communication. (Default: 7000) +#-Dcassandra.storage_port=port + +# Set the default location for the trigger JARs. (Default: conf/triggers) +#-Dcassandra.triggers_dir=directory + +# For testing new compaction and compression strategies. It allows you to experiment with different +# strategies and benchmark write performance differences without affecting the production workload. +#-Dcassandra.write_survey=true + +# To disable configuration via JMX of auth caches (such as those for credentials, permissions and +# roles). This will mean those config options can only be set (persistently) in cassandra.yaml +# and will require a restart for new values to take effect. +#-Dcassandra.disable_auth_caches_remote_configuration=true + +# To disable dynamic calculation of the page size used when indexing an entire partition (during +# initial index build/rebuild). If set to true, the page size will be fixed to the default of +# 10000 rows per page. +#-Dcassandra.force_default_indexing_page_size=true + +# Imposes an upper bound on hint lifetime below the normal min gc_grace_seconds +#-Dcassandra.maxHintTTL=max_hint_ttl_in_seconds + +######################## +# GENERAL JVM SETTINGS # +######################## + +# enable assertions. highly suggested for correct application functionality. +-ea + +# disable assertions for net.openhft.** because it runs out of memory by design +# if enabled and run for more than just brief testing +-da:net.openhft... + +# enable thread priorities, primarily so we can give periodic tasks +# a lower priority to avoid interfering with client workload +-XX:+UseThreadPriorities + +# Enable heap-dump if there's an OOM +-XX:+HeapDumpOnOutOfMemoryError + +# Per-thread stack size. +-Xss256k + +# Make sure all memory is faulted and zeroed on startup. +# This helps prevent soft faults in containers and makes +# transparent hugepage allocation more effective. +-XX:+AlwaysPreTouch + +# Disable biased locking as it does not benefit Cassandra. +-XX:-UseBiasedLocking + +# Enable thread-local allocation blocks and allow the JVM to automatically +# resize them at runtime. +-XX:+UseTLAB +-XX:+ResizeTLAB +-XX:+UseNUMA + +# http://www.evanjones.ca/jvm-mmap-pause.html +-XX:+PerfDisableSharedMem + +# Prefer binding to IPv4 network intefaces (when net.ipv6.bindv6only=1). See +# http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6342561 (short version: +# comment out this entry to enable IPv6 support). +-Djava.net.preferIPv4Stack=true + +### Debug options + +# uncomment to enable flight recorder +#-XX:+UnlockCommercialFeatures +#-XX:+FlightRecorder + +# uncomment to have Cassandra JVM listen for remote debuggers/profilers on port 1414 +#-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=1414 + +# uncomment to have Cassandra JVM log internal method compilation (developers only) +#-XX:+UnlockDiagnosticVMOptions +#-XX:+LogCompilation + +################# +# HEAP SETTINGS # +################# + +# Heap size is automatically calculated by cassandra-env based on this +# formula: max(min(1/2 ram, 1024MB), min(1/4 ram, 8GB)) +# That is: +# - calculate 1/2 ram and cap to 1024MB +# - calculate 1/4 ram and cap to 8192MB +# - pick the max +# +# For production use you may wish to adjust this for your environment. +# If that's the case, uncomment the -Xmx and Xms options below to override the +# automatic calculation of JVM heap memory. +# +# It is recommended to set min (-Xms) and max (-Xmx) heap sizes to +# the same value to avoid stop-the-world GC pauses during resize, and +# so that we can lock the heap in memory on startup to prevent any +# of it from being swapped out. +#-Xms4G +#-Xmx4G + +# Young generation size is automatically calculated by cassandra-env +# based on this formula: min(100 * num_cores, 1/4 * heap size) +# +# The main trade-off for the young generation is that the larger it +# is, the longer GC pause times will be. The shorter it is, the more +# expensive GC will be (usually). +# +# It is not recommended to set the young generation size if using the +# G1 GC, since that will override the target pause-time goal. +# More info: http://www.oracle.com/technetwork/articles/java/g1gc-1984535.html +# +# The example below assumes a modern 8-core+ machine for decent +# times. If in doubt, and if you do not particularly want to tweak, go +# 100 MB per physical CPU core. +#-Xmn800M + +################################### +# EXPIRATION DATE OVERFLOW POLICY # +################################### + +# Defines how to handle INSERT requests with TTL exceeding the maximum supported expiration date: +# * REJECT: this is the default policy and will reject any requests with expiration date timestamp after 2038-01-19T03:14:06+00:00. +# * CAP: any insert with TTL expiring after 2038-01-19T03:14:06+00:00 will expire on 2038-01-19T03:14:06+00:00 and the client will receive a warning. +# * CAP_NOWARN: same as previous, except that the client warning will not be emitted. +# +#-Dcassandra.expiration_date_overflow_policy=REJECT diff --git a/etc/cass_startup_client/jvm11-clients.options b/etc/cass_startup_client/jvm11-clients.options new file mode 100644 index 0000000..c88b7ab --- /dev/null +++ b/etc/cass_startup_client/jvm11-clients.options @@ -0,0 +1,29 @@ +########################################################################### +# jvm11-clients.options # +# # +# See jvm-clients.options. This file is specific for Java 11 and newer. # +########################################################################### + +################### +# JPMS SETTINGS # +################### + +-Djdk.attach.allowAttachSelf=true +--add-exports java.base/jdk.internal.misc=ALL-UNNAMED +--add-exports java.base/jdk.internal.ref=ALL-UNNAMED +--add-exports java.base/sun.nio.ch=ALL-UNNAMED +--add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED +--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED +--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED +--add-exports java.sql/java.sql=ALL-UNNAMED + +--add-opens java.base/java.lang.module=ALL-UNNAMED +--add-opens java.base/jdk.internal.loader=ALL-UNNAMED +--add-opens java.base/jdk.internal.ref=ALL-UNNAMED +--add-opens java.base/jdk.internal.reflect=ALL-UNNAMED +--add-opens java.base/jdk.internal.math=ALL-UNNAMED +--add-opens java.base/jdk.internal.module=ALL-UNNAMED +--add-opens java.base/jdk.internal.util.jar=ALL-UNNAMED +--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED + +# The newline in the end of file is intentional diff --git a/etc/cass_startup_client/jvm11-server.options b/etc/cass_startup_client/jvm11-server.options new file mode 100644 index 0000000..7e78467 --- /dev/null +++ b/etc/cass_startup_client/jvm11-server.options @@ -0,0 +1,103 @@ +########################################################################### +# jvm11-server.options # +# # +# See jvm-server.options. This file is specific for Java 11 and newer. # +########################################################################### + +################# +# GC SETTINGS # +################# + + + +### CMS Settings +-XX:+UseConcMarkSweepGC +-XX:+CMSParallelRemarkEnabled +-XX:SurvivorRatio=8 +-XX:MaxTenuringThreshold=1 +-XX:CMSInitiatingOccupancyFraction=75 +-XX:+UseCMSInitiatingOccupancyOnly +-XX:CMSWaitDuration=10000 +-XX:+CMSParallelInitialMarkEnabled +-XX:+CMSEdenChunksRecordAlways +## some JVMs will fill up their heap when accessed via JMX, see CASSANDRA-6541 +-XX:+CMSClassUnloadingEnabled + + + +### G1 Settings +## Use the Hotspot garbage-first collector. +#-XX:+UseG1GC +#-XX:+ParallelRefProcEnabled + +# +## Have the JVM do less remembered set work during STW, instead +## preferring concurrent GC. Reduces p99.9 latency. +#-XX:G1RSetUpdatingPauseTimePercent=5 +# +## Main G1GC tunable: lowering the pause target will lower throughput and vise versa. +## 200ms is the JVM default and lowest viable setting +## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml. +#-XX:MaxGCPauseMillis=500 + +## Optional G1 Settings +# Save CPU time on large (>= 16GB) heaps by delaying region scanning +# until the heap is 70% full. The default in Hotspot 8u40 is 40%. +#-XX:InitiatingHeapOccupancyPercent=70 + +# For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores. +# Otherwise equal to the number of cores when 8 or less. +# Machines with > 10 cores should try setting these to <= full cores. +#-XX:ParallelGCThreads=16 +# By default, ConcGCThreads is 1/4 of ParallelGCThreads. +# Setting both to the same value can reduce STW durations. +#-XX:ConcGCThreads=16 + + +### JPMS + +-Djdk.attach.allowAttachSelf=true +--add-exports java.base/jdk.internal.misc=ALL-UNNAMED +--add-exports java.base/jdk.internal.ref=ALL-UNNAMED +--add-exports java.base/sun.nio.ch=ALL-UNNAMED +--add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED +--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED +--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED +--add-exports java.sql/java.sql=ALL-UNNAMED + +--add-opens java.base/java.lang.module=ALL-UNNAMED +--add-opens java.base/jdk.internal.loader=ALL-UNNAMED +--add-opens java.base/jdk.internal.ref=ALL-UNNAMED +--add-opens java.base/jdk.internal.reflect=ALL-UNNAMED +--add-opens java.base/jdk.internal.math=ALL-UNNAMED +--add-opens java.base/jdk.internal.module=ALL-UNNAMED +--add-opens java.base/jdk.internal.util.jar=ALL-UNNAMED +--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED + + +### GC logging options -- uncomment to enable + +# Java 11 (and newer) GC logging options: +# See description of https://bugs.openjdk.java.net/browse/JDK-8046148 for details about the syntax +# The following is the equivalent to -XX:+PrintGCDetails -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M +#-Xlog:gc=info,heap*=trace,age*=debug,safepoint=info,promotion*=trace:file=/var/log/cassandra/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760 + +# Notes for Java 8 migration: +# +# -XX:+PrintGCDetails maps to -Xlog:gc*:... - i.e. add a '*' after "gc" +# -XX:+PrintGCDateStamps maps to decorator 'time' +# +# -XX:+PrintHeapAtGC maps to 'heap' with level 'trace' +# -XX:+PrintTenuringDistribution maps to 'age' with level 'debug' +# -XX:+PrintGCApplicationStoppedTime maps to 'safepoint' with level 'info' +# -XX:+PrintPromotionFailure maps to 'promotion' with level 'trace' +# -XX:PrintFLSStatistics=1 maps to 'freelist' with level 'trace' + +### Netty Options + +# On Java >= 9 Netty requires the io.netty.tryReflectionSetAccessible system property to be set to true to enable +# creation of direct buffers using Unsafe. Without it, this falls back to ByteBuffer.allocateDirect which has +# inferior performance and risks exceeding MaxDirectMemory +-Dio.netty.tryReflectionSetAccessible=true + +# The newline in the end of file is intentional diff --git a/etc/cass_startup_client/jvm8-clients.options b/etc/cass_startup_client/jvm8-clients.options new file mode 100644 index 0000000..7d1b2ef --- /dev/null +++ b/etc/cass_startup_client/jvm8-clients.options @@ -0,0 +1,9 @@ +########################################################################### +# jvm8-clients.options # +# # +# See jvm-clients.options. This file is specific for Java 8 and newer. # +########################################################################### + +# intentionally left empty + +# The newline in the end of file is intentional diff --git a/etc/cass_startup_client/jvm8-server.options b/etc/cass_startup_client/jvm8-server.options new file mode 100644 index 0000000..6214669 --- /dev/null +++ b/etc/cass_startup_client/jvm8-server.options @@ -0,0 +1,76 @@ +########################################################################### +# jvm8-server.options # +# # +# See jvm-server.options. This file is specific for Java 8 and newer. # +########################################################################### + +######################## +# GENERAL JVM SETTINGS # +######################## + +# allows lowering thread priority without being root on linux - probably +# not necessary on Windows but doesn't harm anything. +# see http://tech.stolsvik.com/2010/01/linux-java-thread-priorities-workaround.html +-XX:ThreadPriorityPolicy=42 + +################# +# GC SETTINGS # +################# + +### CMS Settings +-XX:+UseParNewGC +-XX:+UseConcMarkSweepGC +-XX:+CMSParallelRemarkEnabled +-XX:SurvivorRatio=8 +-XX:MaxTenuringThreshold=1 +-XX:CMSInitiatingOccupancyFraction=75 +-XX:+UseCMSInitiatingOccupancyOnly +-XX:CMSWaitDuration=10000 +-XX:+CMSParallelInitialMarkEnabled +-XX:+CMSEdenChunksRecordAlways +## some JVMs will fill up their heap when accessed via JMX, see CASSANDRA-6541 +-XX:+CMSClassUnloadingEnabled + +### G1 Settings +## Use the Hotspot garbage-first collector. +#-XX:+UseG1GC +#-XX:+ParallelRefProcEnabled + +# +## Have the JVM do less remembered set work during STW, instead +## preferring concurrent GC. Reduces p99.9 latency. +#-XX:G1RSetUpdatingPauseTimePercent=5 +# +## Main G1GC tunable: lowering the pause target will lower throughput and vise versa. +## 200ms is the JVM default and lowest viable setting +## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml. +#-XX:MaxGCPauseMillis=500 + +## Optional G1 Settings +# Save CPU time on large (>= 16GB) heaps by delaying region scanning +# until the heap is 70% full. The default in Hotspot 8u40 is 40%. +#-XX:InitiatingHeapOccupancyPercent=70 + +# For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores. +# Otherwise equal to the number of cores when 8 or less. +# Machines with > 10 cores should try setting these to <= full cores. +#-XX:ParallelGCThreads=16 +# By default, ConcGCThreads is 1/4 of ParallelGCThreads. +# Setting both to the same value can reduce STW durations. +#-XX:ConcGCThreads=16 + +### GC logging options -- uncomment to enable + +-XX:+PrintGCDetails +-XX:+PrintGCDateStamps +-XX:+PrintHeapAtGC +-XX:+PrintTenuringDistribution +-XX:+PrintGCApplicationStoppedTime +-XX:+PrintPromotionFailure +#-XX:PrintFLSStatistics=1 +#-Xloggc:/var/log/cassandra/gc.log +-XX:+UseGCLogFileRotation +-XX:NumberOfGCLogFiles=10 +-XX:GCLogFileSize=10M + +# The newline in the end of file is intentional diff --git a/etc/cass_startup_client/logback.xml b/etc/cass_startup_client/logback.xml new file mode 100644 index 0000000..e98fea4 --- /dev/null +++ b/etc/cass_startup_client/logback.xml @@ -0,0 +1,124 @@ + + + + + + + + + + + + + + INFO + + ${cassandra.logdir}/system.log + + + ${cassandra.logdir}/system.log.%d{yyyy-MM-dd}.%i.zip + + 50MB + 7 + 5GB + + + %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + + + + + + + ${cassandra.logdir}/debug.log + + + ${cassandra.logdir}/debug.log.%d{yyyy-MM-dd}.%i.zip + + 50MB + 7 + 5GB + + + %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + + + + + + + 1024 + 0 + true + + + + + + + + INFO + + + %-5level [%thread] %date{ISO8601} %F:%L - %msg%n + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/startup/queries/Dockerfile b/startup/queries/Dockerfile new file mode 100644 index 0000000..b1d81ff --- /dev/null +++ b/startup/queries/Dockerfile @@ -0,0 +1,4 @@ +FROM python:3 +ADD exercise2_3.py / +RUN pip install cassandra-driver +CMD [ "python3", "./exercise2_3.py" ] \ No newline at end of file diff --git a/startup/queries/exercise2_3.py b/startup/queries/exercise2_3.py new file mode 100644 index 0000000..796930b --- /dev/null +++ b/startup/queries/exercise2_3.py @@ -0,0 +1,24 @@ +from collections import OrderedDict +from collections import Counter +from cassandra.cluster import Cluster + +cluster = Cluster(['172.20.0.6', '172.20.0.7', '172.20.0.8']) +session = cluster.connect('twitter') + +# 2. Find the 100 accounts with the most followers +rows = session.execute('SELECT user_id,follower_len FROM twitter.most_follows;') +sorted_rows = dict(sorted(dict(rows).items(), key=lambda item: item[1])) +most_follows = list(sorted_rows.keys())[-100:] +print("Top 100 most followed accounts:",most_follows) + +print("________________________________________________________________") + +# Finding the 100 accounts that follow the most of the accounts found in 2). +followed_accs = session.execute(f'SELECT follower_id FROM twitter.follower_relation WHERE user_id IN {tuple(most_follows)};') +# followed_accs = list(followed_accs.keys()) +_list = list() +for row in followed_accs: + _list.append(row[0]) + +followed_top_100 = Counter(_list).most_common(100) +print("100 accounts that follow the most of the accounts found in 2)",followed_top_100) diff --git a/startup/queries/twitter_queries.cql b/startup/queries/twitter_queries.cql new file mode 100644 index 0000000..d5bb412 --- /dev/null +++ b/startup/queries/twitter_queries.cql @@ -0,0 +1,44 @@ +// 1. Auflisten der Posts, die von einem Account gemacht wurden, bzw. ihm zugeordnet wurden +SELECT content FROM twitter.tweets where author_id = X; +// or +SELECT * FROM twitter.tweets WHERE author='katyperry' ALLOW FILTERING; + +// 2 .Finden der 100 Accounts mit den meisten Followern + +// VIEW not possible -> no group by allowed AND YOU CANT SORT DATA https://stackoverflow.com/a/14463098 +// CREATE MATERIALIZED VIEW twitter.most_follows AS SELECT user_id, count(user_id) as len from twitter.user group by user_id PRIMARY KEY (user_id,len); +// # Use of spark cluster or trigger to update tables to add the lenght of follower ids. -> New Data Schema +// # not supported on counter table -> CREATE MATERIALIZED VIEW twitter.most_follows AS SELECT user_id, follower_len from twitter.user_stats PRIMARY KEY ((follower_len,user_id)); +// change to int +CREATE MATERIALIZED VIEW twitter.most_follows AS SELECT user_id, follower_len from twitter.user_stats WHERE user_id is not null and follower_len is not null PRIMARY KEY (follower_len,user_id); + +// 3. Finden der 100 Accounts, die den meisten der Accounts folgen, die in 2) gefunden wurden + +// 4. +// die Anzahl der Follower & die Anzahl der verfolgten Accounts +SELECT * FROM twitter.user_stats WHERE user_id = 14378300; +// 25 neusten +CREATE MATERIALIZED VIEW twitter.start_view_new AS SELECT user_id_x,follower_id,date_time,name,author,content,id FROM twitter.user WHERE user_id_x IS NOT NULL AND follower_id IS NOT NULL AND date_time IS NOT NULL AND id IS NOT NULL PRIMARY KEY ((user_id_x),date_time,follower_id,id); +SELECT * FROM twitter.start_view WHERE user_id_x = 14378300 ORDER BY date_time DESC LIMIT 25; +// 25 beliebtesten +CREATE MATERIALIZED VIEW twitter.start_view_like AS SELECT user_id_x,follower_id,number_of_likes,date_time,author,name,content,id FROM twitter.user WHERE user_id_x IS NOT NULL AND follower_id IS NOT NULL AND number_of_likes IS NOT NULL AND id IS NOT NULL PRIMARY KEY ((user_id_x),number_of_likes,follower_id,id); +SELECT * FROM twitter.start_view_like WHERE user_id_x = 14378300 ORDER BY number_of_likes DESC LIMIT 25; + +// 5. Fan-Out as VIEW ? +// # InvalidRequest: Error from server: code=2200 [Invalid query] message="Unknown column 'user_id_x' referenced in PRIMARY KEY for materialized view 'start_view_biber'" +CREATE MATERIALIZED VIEW twitter.start_view_biber AS SELECT follower_id,number_of_likes,date_time,author,name,content,id FROM twitter.user WHERE user_id_x IS NOT NULL AND user_id_x=172883064 AND follower_id IS NOT NULL AND number_of_likes IS NOT NULL AND id IS NOT NULL PRIMARY KEY ((user_id_x),number_of_likes,follower_id,id); +// # order by need partion key in WHERE +SELECT * from twitter.start_view_biber WHERE user_id_x=14378300 ORDER BY number_of_likes DESC LIMIT 25; +// INSERT new tweet +INSERT INTO twitter.user(user_id_x,follower_id,name,author ,content ,country ,date_time ,id ,language ,latitude ,longitude ,number_of_likes ,number_of_shares ,user_id_y ) +VALUES(14378300, 261047860, 'Justin','NoName', 'Hallo there BDEA','DE', dateof(now()), 'NoID', 'text', 100, 100, 10000000, 0, 0); + +// 6. Auflisten Post die ein geg. Wort enthalten (falls möglich auch mit UND-Verknüpfung mehrerer Worte) +// # Enables SASI index creation on this node && SASI indexes are considered experimental and are not recommended for production use. +// # InvalidRequest: Error from server: code=2200 [Invalid query] message="Secondary indexes on materialized views aren't supported" +// # SyntaxException: line 1:7 no viable alternative at input 'SEARCH' ([CREATE] SEARCH...) +CREATE CUSTOM INDEX search_in ON twitter.tweets (content) USING 'org.apache.cassandra.index.sasi.SASIIndex' WITH OPTIONS = { 'mode': 'CONTAINS', 'analyzer_class': 'org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer', 'case_sensitive': 'false' }; +SELECT * from twitter.tweets WHERE content LIKE '%This%' limit 25; + + + diff --git a/startup/setup/cleaning.py b/startup/setup/cleaning.py new file mode 100644 index 0000000..82d410f --- /dev/null +++ b/startup/setup/cleaning.py @@ -0,0 +1,83 @@ +from ctypes import sizeof +from datetime import date +import pandas as pd +import numpy as np + +# read follwer +df_follower = pd.read_csv("cassandra/startup/data/twitter_combined_orginal.txt", sep=' ', names=["user_id","follower_id"]) +df_follower = df_follower.drop_duplicates() # drop duplicate follows in data +df_user_stats = df_follower.groupby('user_id')['follower_id'].size().reset_index(name='follower_len') # group user_id to follows -> user is following +df_user_stats['follows_len'] = df_follower.groupby('follower_id')['user_id'].size().reset_index(name='follows_len')['follows_len'] # group follows to user_id -> user are followed by +df_user_follow_list = df_follower.groupby('user_id')['follower_id'].apply(list).reset_index(name='follows') # group follows to user_id -> user are followed by +df_user_follow_list['follower'] = df_follower.groupby('follower_id')['user_id'].apply(list).reset_index(name='follower')['follower'] # group follows to user_id -> user are followed by +df_user_stats['follows_len'] = df_follower.groupby('follower_id')['user_id'].size().reset_index(name='follows_len')['follows_len'] # group follows to user_id -> user are followed by + +df_follower.to_csv("cassandra/startup/data/user_follower_relation.csv",index=False) + +# change comment section in twitter.csv because \n and \r are not functionally quoteted +df_tweet = pd.read_csv("cassandra/startup/data/tweets_orginal.csv") +df_tweet['content'] = df_tweet["content"].str.replace("\n","\\n") +df_tweet['content'] = df_tweet["content"].str.replace("\r","\\r") +df_tweet['content'] = df_tweet["content"].str.replace(',',"\,") + +### map user_ids to the tweets +# list of distinct authors +authors = df_tweet['author'].drop_duplicates().reset_index(drop=True) + +# sort user after the most followers and get random authors ids +df_follower['len'] = df_follower.groupby('user_id')['follower_id'].apply(list).str.len() +df_user_id = df_follower.sort_values(by='len', ascending=False)['user_id'][:100].sample(n=len(authors)).reset_index(drop=True) +df_follower = df_follower.drop(columns='len') + +# merge the author_names and id into the user table +authors = pd.DataFrame(authors) +authors['user_id'] = df_user_id +df_tweet['user_id'] = df_tweet.merge(authors, on='author').user_id +df_follower['name'] = df_follower.merge(authors,how='left', on='user_id')['author'].values +tweets_len = df_tweet.groupby('user_id')['id'].size().reset_index(name='tweets_len').astype(int) +df_user_stats['tweets_len'] = df_user_stats.merge(tweets_len,how='left', on='user_id')['tweets_len'].values + +# add liked from user to each tweet +user_ids = pd.DataFrame(df_follower['user_id'].drop_duplicates()) +num_of_ids_missing = int(df_tweet['number_of_likes'].max()/10) - user_ids.values.size +# generate new user if most liked tweet are more than the user number +if num_of_ids_missing > 0: + a = np.empty((num_of_ids_missing,2,)) + a[:] = np.nan + new_user_ids = np.arange(user_ids.max()+1,user_ids.max()+1+num_of_ids_missing) + a[:,0] = new_user_ids + user_ids = pd.concat([pd.DataFrame(a, columns=user_ids.columns), user_ids], ignore_index=True) + user_ids['user_id'] = user_ids['user_id'].astype(np.uint32) + +# sort random user_id to all tweets +tweet_ids = pd.DataFrame(df_tweet['id'].drop_duplicates()) +tweet_ids['liked_from'] = df_tweet.apply(lambda row: list(np.random.choice(user_ids['user_id'],size = int(row['number_of_likes']/10))),axis=1) +tweet_ids['date_time'] = df_tweet['date_time'] + +# split into separte files +partitions = 7 +dfs = np.array_split(tweet_ids, partitions) +for i,df in enumerate(dfs): + df.to_csv("cassandra/startup/data/tweet_liked/tweet"+str(i)+".txt",index=False) + + +# save the updated data +df_tweet.to_csv("cassandra/startup/data/tweets.csv",index=False) +df_user_stats.to_csv("cassandra/startup/data/user_stats.txt",index=False) +df_user_follow_list.to_csv("cassandra/startup/data/user_follows.txt",index=False) + +# save realtionship betweet user_ID, follower_ID and tweet_ID +relation_list = list() +for i,row in df_tweet.iterrows(): + df = df_tweet.iloc[[i]] + relation_list.append(df_follower.merge(df,left_on='follower_id',right_on='user_id')) + if i % 1000 == 0: + df_follower_new = pd.concat(relation_list) + df_follower_new.to_csv("cassandra/startup/data/relations/relation"+str(i)+".txt",index=False) + relation_list = list() + + + + + + diff --git a/startup/setup/setup_db.sh b/startup/setup/setup_db.sh new file mode 100644 index 0000000..9fe20b6 --- /dev/null +++ b/startup/setup/setup_db.sh @@ -0,0 +1,11 @@ +#!/bin/bash +cqlsh cass1 -f /tmp/startup/setup/setup_db_1.cql +cqlsh cass1 -f /tmp/startup/setup/setup_db_2.cql +# NOTE : Quote it else use array to avoid problems # +# :1:Failed to import 20 rows: WriteTimeout - Error from server: code=1100 [Coordinator node timed out waiting for replica nodes' responses] message="Operation timed out - received only 0 responses." info={'consistency': 'ONE', 'required_responses': 1, 'received_responses': 0, 'write_type': 'UNLOGGED_BATCH'}, will retry later, attempt 1 of 5 +TWEET_FILES="/tmp/startup/data/relations/*" +for f in $TWEET_FILES; do + cqlsh cass1 -e "COPY twitter.user (user_id_x,follower_id,name,author,content,country,date_time,id,language,latitude,longitude,number_of_likes,number_of_shares,user_id_y) FROM '$f' WITH DELIMITER=',' AND HEADER=TRUE;" +done + +cqlsh cass1 -f /tmp/startup/setup/setup_views.cql diff --git a/startup/setup/setup_db_1.cql b/startup/setup/setup_db_1.cql new file mode 100644 index 0000000..398ecfe --- /dev/null +++ b/startup/setup/setup_db_1.cql @@ -0,0 +1,25 @@ +CREATE KEYSPACE twitter WITH replication = {'class':'NetworkTopologyStrategy', 'replication_factor' : 3}; + +CREATE TABLE twitter.tweets( +author text, +content text, +country text, +date_time timestamp, +id text, +language text, +latitude double, +longitude double, +number_of_likes int, +number_of_shares int, +author_id bigint, +PRIMARY KEY ((author_id), date_time, id) +) WITH compaction = {'class' : 'LeveledCompactionStrategy'}; + +CREATE TABLE twitter.follower_relation( +user_id int, +follower_id int, +PRIMARY KEY (user_id,follower_id) +) WITH compaction = {'class' : 'LeveledCompactionStrategy'}; + +COPY twitter.tweets (author,content,country,date_time,id,language,latitude,longitude,number_of_likes,number_of_shares, author_id) FROM '/tmp/startup/data/tweets.csv' WITH DELIMITER=',' AND HEADER=TRUE; +COPY twitter.follower_relation (user_id,follower_id) FROM '/tmp/startup/data/user_follower_relation.csv' WITH DELIMITER=',' AND HEADER=TRUE; diff --git a/startup/setup/setup_db_2.cql b/startup/setup/setup_db_2.cql new file mode 100644 index 0000000..f29c232 --- /dev/null +++ b/startup/setup/setup_db_2.cql @@ -0,0 +1,37 @@ +CREATE TABLE twitter.user( +user_id_x int, +follower_id int, +name text, +author text, +content text, +country text, +date_time timestamp, +id text, +language text, +latitude double, +longitude double, +number_of_likes int, +number_of_shares int, +user_id_y int, +PRIMARY KEY((user_id_x,follower_id,id)) +) WITH compaction = {'class' : 'LeveledCompactionStrategy'}; + + +CREATE TABLE twitter.tweet_liked_from( +tweet_id text, +date_time timestamp, +liked_from set, +PRIMARY KEY(tweet_id,date_time) +) WITH compaction = {'class' : 'LeveledCompactionStrategy'}; + + +CREATE TABLE twitter.user_stats( +user_id bigint, +follower_len int, +follows_len int, +tweet_len int, +PRIMARY KEY(user_id) +) WITH compaction = {'class' : 'LeveledCompactionStrategy'}; + +COPY twitter.user_stats (user_id,follower_len,follows_len,tweet_len) FROM '/tmp/startup/data/user_stats.txt' WITH DELIMITER=',' AND HEADER=TRUE; + diff --git a/startup/setup/setup_likedFrom.sh b/startup/setup/setup_likedFrom.sh new file mode 100644 index 0000000..fc68bfa --- /dev/null +++ b/startup/setup/setup_likedFrom.sh @@ -0,0 +1,5 @@ +#!/bin/bash +LIKED_FROM_FILES="/tmp/startup/data/tweet_liked/*" +for f in $LIKED_FROM_FILES; do + cqlsh cass1 -e "COPY twitter.tweet_liked_from (tweet_id,liked_from,date_time) FROM '$f' WITH DELIMITER=',' AND HEADER=TRUE;" +done diff --git a/startup/setup/setup_views.cql b/startup/setup/setup_views.cql new file mode 100644 index 0000000..629d1b6 --- /dev/null +++ b/startup/setup/setup_views.cql @@ -0,0 +1,28 @@ +CREATE MATERIALIZED VIEW twitter.start_view_like AS + SELECT user_id_x,follower_id,number_of_likes,number_of_shares,date_time,author,name,content,id + FROM twitter.user + WHERE user_id_x IS NOT NULL AND follower_id IS NOT NULL AND number_of_likes IS NOT NULL AND id IS NOT NULL +PRIMARY KEY ((user_id_x),number_of_likes,follower_id,id); + +CREATE MATERIALIZED VIEW twitter.start_view_new AS + SELECT user_id_x,follower_id,date_time,number_of_likes,number_of_shares,name,author,content,id + FROM twitter.user + WHERE user_id_x IS NOT NULL AND follower_id IS NOT NULL AND date_time IS NOT NULL AND id IS NOT NULL +PRIMARY KEY ((user_id_x),date_time,follower_id,id); + +CREATE MATERIALIZED VIEW twitter.start_view_taylor AS + SELECT follower_id,number_of_likes,number_of_shares,date_time,author,name,content,id,user_id_x + FROM twitter.user + WHERE user_id_x IS NOT NULL AND user_id_x=233248636 AND follower_id IS NOT NULL AND number_of_likes IS NOT NULL AND id IS NOT NULL +PRIMARY KEY ((user_id_x),number_of_likes,follower_id,id); + +CREATE MATERIALIZED VIEW twitter.start_view_user1 AS + SELECT follower_id,number_of_likes,number_of_shares,date_time,author,name,content,id,user_id_x + FROM twitter.user + WHERE user_id_x IS NOT NULL AND user_id_x=172883064 AND follower_id IS NOT NULL AND number_of_likes IS NOT NULL AND id IS NOT NULL +PRIMARY KEY ((user_id_x),number_of_likes,follower_id,id); + +CREATE CUSTOM INDEX search_in + ON twitter.tweets (content) + USING 'org.apache.cassandra.index.sasi.SASIIndex' + WITH OPTIONS = { 'mode': 'CONTAINS', 'analyzer_class': 'org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer', 'case_sensitive': 'false' };