We did this all in one go

master
Jonathan Seltmann 2024-06-29 11:29:35 +02:00
commit 90cec75cde
80 changed files with 12722 additions and 0 deletions

1
.gitignore vendored 100644
View File

@ -0,0 +1 @@
data

107
README.md 100644
View File

@ -0,0 +1,107 @@
## Cassandra
Command can be run on the Cassandra Web at `localhost:3000` or in the docker container with:
sudo docker exec -it cass1 cqlsh
Note the `MATERIALZIED VIEWS` and `CUSTOM INDEX` are already created in the [startup script](https://github.com/Miracle-Fruit/kikeriki/blob/main/cassandra/startup/setup/setup_db.sh) and don't needed to be run again.
## Queries
All queries can also be found [here](https://github.com/Miracle-Fruit/kikeriki/tree/main/cassandra/startup/queries).
1. Listing all the posts made by an account.
SELECT content FROM twitter.tweets where author_id = 233248636;
![result_ex1](../query_results/ex1.png)
2. Find the 100 accounts with the most followers
``` python
# 2. Find the 100 accounts with the most followers
rows = session.execute('SELECT user_id,follower_len FROM twitter.most_follows;')
sorted_rows = dict(sorted(dict(rows).items(), key=lambda item: item[1]))
most_follows = list(sorted_rows.keys())[-100:]
print("Top 100 most followed accounts:",most_follows)
```
Top 100 most followed accounts: [817268, 173732041, 14506809, 158804228, 358775055, 45416789, 302282272, 65913144, 261001122, 14246001, 7702232, 101204352, 280365428, 15439395, 26929220, 46537966, 32774989, 2367911, 7429892, 88097807, 274153775, 41147062, 127973392, 10350, 12127832, 18666844, 279787626, 14511951, 116036694, 225784456, 270449528, 63796828, 22784458, 364917755, 15693493, 17346342, 28933226, 7872262, 14180231, 100581193, 88323281, 131029775, 25952851, 14692604, 21681252, 309366491, 25376226, 22705686, 188108667, 18715024, 3829151, 41172837, 92319025, 24081780, 15102849, 83943787, 15680204, 7846, 184097849, 7377812, 204317520, 24641194, 14691709, 112939321, 7081402, 153226312, 14269220, 94414805, 108811740, 1065921, 221829166, 116952434, 15222083, 6608332, 17759701, 16098603, 7860742, 20471349, 6519522, 19040580, 14536491, 197504076, 12611642, 36198161, 3840, 22841103, 440963134, 20273398, 17092592, 13348, 17093617, 22679419, 208132323, 18776017, 15846407, 18581803, 5442012, 813286, 3359851, 59804598]
3. Finding the 100 accounts that follow the most of the accounts found in 2).
```python
followed_accs = session.execute(f'SELECT follower_id FROM twitter.follower_relation WHERE user_id IN {tuple(most_follows)};')
_list = list()
for row in followed_accs:
_list.append(row[0])
followed_top_100 = Counter(_list).most_common(100)
print("100 accounts that follow the most of the accounts found in 2)",followed_top_100)
```
100 accounts that follow the most of the accounts found in 2) [(3359851, 47), (7860742, 47), (15913, 45), (7861312, 45), (16098603, 43), (18776017, 41), (10350, 39), (48485771, 39), (22679419, 38), (3443591, 37), (18581803, 37), (18927441, 37), (26281970, 37), (7872262, 36), (24742040, 36), (5442012, 36), (10671602, 35), (11928542, 35), (14922225, 35), (16453996, 35), (21681252, 35), (14589257, 34), (15853668, 34), (9451052, 34), (65913144, 34), (14269220, 34), (15234657, 34), (17092592, 34), (59804598, 34), (87764480, 34), (40981798, 34), (16475194, 34), (16464746, 33), (93905958, 33), (20152005, 33), (42361118, 33), (36629388, 33), (18666844, 32), (19413393, 32), (12127832, 32), (14180231, 32), (14983833, 32), (25026165, 32), (18742444, 32), (29758446, 31), (20880546, 31), (116952434, 31), (30207757, 31), (21195122, 31), (43003845, 31), (22784458, 30), (19040580, 30), (14691709, 30), (24004172, 30), (1065921, 30), (26280712, 30), (43170475, 30), (22462180, 30), (7846, 29), (1183041, 29), (3040621, 29), (7377812, 29), (34428380, 29), (20273398, 28), (14230524, 28), (15222083, 28), (24641194, 28), (83943787, 28), (36198161, 28), (14536491, 28), (813286, 27), (12611642, 27), (29514951, 27), (31353077, 27), (18996905, 27), (14471778, 27), (15838599, 27), (16112634, 27), (17526132, 27), (17759701, 27), (31331740, 27), (25952851, 27), (84043660, 27), (9431932, 27), (20397258, 27), (127973392, 27), (16674726, 27), (116036694, 27), (972651, 26), (13687132, 26), (4068821, 26), (7702232, 26), (6080022, 26), (17224642, 26), (20935355, 26), (24081780, 26), (26033920, 26), (28933226, 26), (43933017, 26), (101633415, 26)]
4. Listing the information for the personal home page of any account (best try with the accounts found in 2); the start page should contain the following (implement as separate queries):
* the number of followers && the number of followed accounts
SELECT follower_len, follows_len FROM twitter.user_stats WHERE user_id = 233248636; //cheack user_id can be changed
![result_ex4_1](../query_results/ex4_1.png)
* either the 25 newest or the 25 most popular posts of the followed accounts (via DB query)
25 newest
CREATE MATERIALIZED VIEW twitter.start_view_new AS
SELECT user_id_x,follower_id,number_of_likes,number_of_shares,date_time,name,author,content,id FROM twitter.user
WHERE user_id_x IS NOT NULL AND follower_id IS NOT NULL AND date_time IS NOT NULL AND id IS NOT NULL
PRIMARY KEY ((user_id_x),date_time,follower_id,id);
SELECT * FROM twitter.start_view_new WHERE user_id_x = 172883064 ORDER BY date_time DESC LIMIT 25;
![result_ex4_2](../query_results/ex4_2_date.png)
25 most popular
CREATE MATERIALIZED VIEW twitter.start_view_like AS
SELECT user_id_x,follower_id,number_of_likes,number_of_shares,date_time,author,name,content,id FROM twitter.user
WHERE user_id_x IS NOT NULL AND follower_id IS NOT NULL AND number_of_likes IS NOT NULL AND id IS NOT NULL
PRIMARY KEY ((user_id_x),number_of_likes,follower_id,id);
SELECT * FROM twitter.start_view_like WHERE user_id_x = 172883064 ORDER BY number_of_likes DESC LIMIT 25;
![result_ex4_2](../query_results/ex4_2_likes.png)
5. Caching of the posts for the home page (cf. 4) requires a so-called fan-out in the cache of each follower when writing a new post
CREATE MATERIALIZED VIEW twitter.start_view_user1 AS
SELECT follower_id,number_of_likes,date_time,author,name,content,id FROM twitter.user
WHERE user_id_x IS NOT NULL AND user_id_x=172883064 AND follower_id IS NOT NULL AND number_of_likes IS NOT NULL AND id IS NOT NULL
PRIMARY KEY ((user_id_x),number_of_likes,follower_id,id);
// # order by need partion key in WHERE
SELECT * from twitter.start_view_taylor WHERE user_id_x=172883064 ORDER BY number_of_likes DESC LIMIT 25;
// INSERT new tweet
INSERT INTO twitter.user
(user_id_x,follower_id,name,author ,content ,country ,date_time ,id ,language ,latitude ,longitude ,number_of_likes ,number_of_shares ,user_id_y)
VALUES
(172883064, 233248636, 'NoName','taylorswift12', 'Hallo there BDEA','DE', dateof(now()), 'NoID', 'de', 48, 48, 10000000, 0, 0);
INSERT INTO twitter.tweets(author, content, country, date_time, id, language, latitude, longitude, number_of_likes, number_of_shares ,author_id)
VALUES
('taylorswift12', 'Hallo there BDEA','DE', dateof(now()), 'NoID', 'de', 48, 48, 10000000, 0, 0 '233248636');
![result_ex4_2](../query_results/ex4_2_likes.png)
6. List of the 25 most popular posts that contain a given word (if possible also with AND linking several words)
// To order by the 25 most number_of_like is at our knowledge not possible with the current dataschema
CREATE CUSTOM INDEX search_in ON twitter.tweets (content) USING 'org.apache.cassandra.index.sasi.SASIIndex'
WITH OPTIONS = { 'mode': 'CONTAINS', 'analyzer_class': 'org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer',
'case_sensitive': 'false' };
SELECT * from twitter.tweets WHERE content LIKE '%world%' limit 25;
![result_ex4_2](../query_results/ex6.png)

10
cqlsh/cqlshrc 100644
View File

@ -0,0 +1,10 @@
;; Setting for cqlsh can be found https://docs.datastax.com/en/dse/5.1/cql/cql/cql_reference/cqlsh_commands/cqlshCqlshrc.html
;; Sample cqlshrc file on GitHub: https://github.com/apache/cassandra/blob/067f4c74612698881feec09039d71d12a5936418/conf/cqlshrc.sample
[ui]
;; Display timezone
timezone = Etc/UTC
;; Used for displaying timestamps (and reading them with COPY)
time_format = %d/%m/%Y %H:%M
[copy]
ESCAPE = \
QUOTE = "

135
docker-compose.yml 100644
View File

@ -0,0 +1,135 @@
# version: "3.9" # optional since v1.27.0
networks:
spaceandtime:
ipam:
config:
- subnet: 172.20.0.0/24 # static ip management required for cassandra web, as this does not work with docker dns
services:
cass1:
image: cassandra:4.0.4 # latest lts version as of 31.05.2022
container_name: cass1
hostname: cass1
healthcheck:
test: ["CMD", "cqlsh", "-e", "describe keyspaces" ]
interval: 10s
timeout: 10s
start_period: 50s
retries: 10
networks:
spaceandtime:
ipv4_address: 172.20.0.6
ports:
- "9042:9042"
volumes:
- ./cqlsh:/root/.cassandra
- ./startup:/tmp/startup
- ./data/cass1:/var/lib/cassandra
- ./etc/cass1:/etc/cassandra # currently not needed as this is configured with the below env variables
environment: &environment
MAX_HEAP_SIZE: 1024M
HEAP_NEWSIZE: 1024M
CASSANDRA_SEEDS: "cass1,cass2"
CASSANDRA_CLUSTER_NAME: SolarSystem
CASSANDRA_DC: Mars
CASSANDRA_RACK: West
CASSANDRA_ENDPOINT_SNITCH: GossipingPropertyFileSnitch
CASSANDRA_NUM_TOKENS: 128
restart: always
cass2:
image: cassandra:4.0.4 # latest lts version as of 31.05.2022
container_name: cass2
hostname: cass2
healthcheck:
test: ["CMD", "cqlsh", "-e", "describe keyspaces" ]
interval: 10s
timeout: 10s
start_period: 50s
retries: 10
networks:
spaceandtime:
ipv4_address: 172.20.0.7
ports:
- "9043:9042"
volumes:
- ./cqlsh:/root/.cassandra
- ./startup:/tmp/startup
- ./data/cass2:/var/lib/cassandra
- ./etc/cass2:/etc/cassandra
environment: *environment
depends_on:
cass1:
condition: service_healthy # service_healthy --> sometimes not working, alternativly restart always policy below
restart: always
cass3:
image: cassandra:4.0.4 # latest lts version as of 31.05.2022
container_name: cass3
hostname: cass3
healthcheck:
test: ["CMD", "cqlsh", "-e", "describe keyspaces" ]
interval: 10s
timeout: 10s
start_period: 50s
retries: 10
networks:
spaceandtime:
ipv4_address: 172.20.0.8
ports:
- "9044:9042"
volumes:
- ./cqlsh:/root/.cassandra
- ./startup:/tmp/startup
- ./data/cass3:/var/lib/cassandra
- ./etc/cass3:/etc/cassandra
environment: *environment
depends_on:
cass2:
condition: service_healthy # service_healthy --> sometimes not working, alternativly restart always policy below
restart: always
cass_startup_client:
image: cassandra:4.0.4 # latest lts version as of 31.05.2022
container_name: cass_startup_client
hostname: cass_startup_client
networks:
spaceandtime:
ipv4_address: 172.20.0.11
volumes:
- ./cqlsh:/root/.cassandra
- ./startup:/tmp/startup
- ./data/cass_startup_client:/var/lib/cassandra
- ./etc/cass_startup_client:/etc/cassandra
environment: *environment
depends_on:
cass1:
condition: service_healthy # service_healthy --> sometimes not working, alternativly restart always policy below
cass2:
condition: service_healthy # service_healthy --> sometimes not working, alternativly restart always policy below
cass3:
condition: service_healthy
command: bash -c "/tmp/startup/setup/setup_db.sh && /tmp/startup/setup/setup_likedFrom.sh"
cassweb:
image: ruby:2.4.1 # latest stable version as of 31.05.2022
container_name: cassweb
hostname: cassweb
command: bash -c "gem install cassandra-web && cassandra-web --hosts '172.20.0.6, 172.20.0.7, 172.20.0.8' --port '9042' --username 'cassandra' --password 'cassandra'"
networks:
spaceandtime:
ipv4_address: 172.20.0.9
ports:
- "3000:3000"
depends_on:
cass1:
condition: service_healthy # service_healthy --> sometimes not working, alternativly restart always policy below
cass2:
condition: service_healthy # service_healthy --> sometimes not working, alternativly restart always policy below
cass3:
condition: service_healthy # service_healthy --> sometimes not working, alternativly restart always policy below
cass_startup_client:
condition: service_started
restart: always

View File

@ -0,0 +1,13 @@
Required configuration files
============================
cassandra.yaml: main Cassandra configuration file
logback.xml: logback configuration file for Cassandra server
Optional configuration files
============================
cassandra-topology.properties: used by PropertyFileSnitch

View File

@ -0,0 +1,307 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
calculate_heap_sizes()
{
case "`uname`" in
Linux)
system_memory_in_mb=`free -m | awk '/:/ {print $2;exit}'`
system_cpu_cores=`egrep -c 'processor([[:space:]]+):.*' /proc/cpuinfo`
;;
FreeBSD)
system_memory_in_bytes=`sysctl hw.physmem | awk '{print $2}'`
system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024`
system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'`
;;
SunOS)
system_memory_in_mb=`prtconf | awk '/Memory size:/ {print $3}'`
system_cpu_cores=`psrinfo | wc -l`
;;
Darwin)
system_memory_in_bytes=`sysctl hw.memsize | awk '{print $2}'`
system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024`
system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'`
;;
*)
# assume reasonable defaults for e.g. a modern desktop or
# cheap server
system_memory_in_mb="2048"
system_cpu_cores="2"
;;
esac
# some systems like the raspberry pi don't report cores, use at least 1
if [ "$system_cpu_cores" -lt "1" ]
then
system_cpu_cores="1"
fi
# set max heap size based on the following
# max(min(1/2 ram, 1024MB), min(1/4 ram, 8GB))
# calculate 1/2 ram and cap to 1024MB
# calculate 1/4 ram and cap to 8192MB
# pick the max
half_system_memory_in_mb=`expr $system_memory_in_mb / 2`
quarter_system_memory_in_mb=`expr $half_system_memory_in_mb / 2`
if [ "$half_system_memory_in_mb" -gt "1024" ]
then
half_system_memory_in_mb="1024"
fi
if [ "$quarter_system_memory_in_mb" -gt "8192" ]
then
quarter_system_memory_in_mb="8192"
fi
if [ "$half_system_memory_in_mb" -gt "$quarter_system_memory_in_mb" ]
then
max_heap_size_in_mb="$half_system_memory_in_mb"
else
max_heap_size_in_mb="$quarter_system_memory_in_mb"
fi
MAX_HEAP_SIZE="${max_heap_size_in_mb}M"
# Young gen: min(max_sensible_per_modern_cpu_core * num_cores, 1/4 * heap size)
max_sensible_yg_per_core_in_mb="100"
max_sensible_yg_in_mb=`expr $max_sensible_yg_per_core_in_mb "*" $system_cpu_cores`
desired_yg_in_mb=`expr $max_heap_size_in_mb / 4`
if [ "$desired_yg_in_mb" -gt "$max_sensible_yg_in_mb" ]
then
HEAP_NEWSIZE="${max_sensible_yg_in_mb}M"
else
HEAP_NEWSIZE="${desired_yg_in_mb}M"
fi
}
# Sets the path where logback and GC logs are written.
if [ "x$CASSANDRA_LOG_DIR" = "x" ] ; then
CASSANDRA_LOG_DIR="$CASSANDRA_HOME/logs"
fi
#GC log path has to be defined here because it needs to access CASSANDRA_HOME
if [ $JAVA_VERSION -ge 11 ] ; then
# See description of https://bugs.openjdk.java.net/browse/JDK-8046148 for details about the syntax
# The following is the equivalent to -XX:+PrintGCDetails -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M
echo "$JVM_OPTS" | grep -qe "-[X]log:gc"
if [ "$?" = "1" ] ; then # [X] to prevent ccm from replacing this line
# only add -Xlog:gc if it's not mentioned in jvm-server.options file
mkdir -p ${CASSANDRA_LOG_DIR}
JVM_OPTS="$JVM_OPTS -Xlog:gc=info,heap*=trace,age*=debug,safepoint=info,promotion*=trace:file=${CASSANDRA_LOG_DIR}/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760"
fi
else
# Java 8
echo "$JVM_OPTS" | grep -qe "-[X]loggc"
if [ "$?" = "1" ] ; then # [X] to prevent ccm from replacing this line
# only add -Xlog:gc if it's not mentioned in jvm-server.options file
mkdir -p ${CASSANDRA_LOG_DIR}
JVM_OPTS="$JVM_OPTS -Xloggc:${CASSANDRA_LOG_DIR}/gc.log"
fi
fi
# Check what parameters were defined on jvm-server.options file to avoid conflicts
echo $JVM_OPTS | grep -q Xmn
DEFINED_XMN=$?
echo $JVM_OPTS | grep -q Xmx
DEFINED_XMX=$?
echo $JVM_OPTS | grep -q Xms
DEFINED_XMS=$?
echo $JVM_OPTS | grep -q UseConcMarkSweepGC
USING_CMS=$?
echo $JVM_OPTS | grep -q +UseG1GC
USING_G1=$?
# Override these to set the amount of memory to allocate to the JVM at
# start-up. For production use you may wish to adjust this for your
# environment. MAX_HEAP_SIZE is the total amount of memory dedicated
# to the Java heap. HEAP_NEWSIZE refers to the size of the young
# generation. Both MAX_HEAP_SIZE and HEAP_NEWSIZE should be either set
# or not (if you set one, set the other).
#
# The main trade-off for the young generation is that the larger it
# is, the longer GC pause times will be. The shorter it is, the more
# expensive GC will be (usually).
#
# The example HEAP_NEWSIZE assumes a modern 8-core+ machine for decent pause
# times. If in doubt, and if you do not particularly want to tweak, go with
# 100 MB per physical CPU core.
#MAX_HEAP_SIZE="4G"
#HEAP_NEWSIZE="800M"
# Set this to control the amount of arenas per-thread in glibc
#export MALLOC_ARENA_MAX=4
# only calculate the size if it's not set manually
if [ "x$MAX_HEAP_SIZE" = "x" ] && [ "x$HEAP_NEWSIZE" = "x" -o $USING_G1 -eq 0 ]; then
calculate_heap_sizes
elif [ "x$MAX_HEAP_SIZE" = "x" ] || [ "x$HEAP_NEWSIZE" = "x" -a $USING_G1 -ne 0 ]; then
echo "please set or unset MAX_HEAP_SIZE and HEAP_NEWSIZE in pairs when using CMS GC (see cassandra-env.sh)"
exit 1
fi
if [ "x$MALLOC_ARENA_MAX" = "x" ] ; then
export MALLOC_ARENA_MAX=4
fi
# We only set -Xms and -Xmx if they were not defined on jvm-server.options file
# If defined, both Xmx and Xms should be defined together.
if [ $DEFINED_XMX -ne 0 ] && [ $DEFINED_XMS -ne 0 ]; then
JVM_OPTS="$JVM_OPTS -Xms${MAX_HEAP_SIZE}"
JVM_OPTS="$JVM_OPTS -Xmx${MAX_HEAP_SIZE}"
elif [ $DEFINED_XMX -ne 0 ] || [ $DEFINED_XMS -ne 0 ]; then
echo "Please set or unset -Xmx and -Xms flags in pairs on jvm-server.options file."
exit 1
fi
# We only set -Xmn flag if it was not defined in jvm-server.options file
# and if the CMS GC is being used
# If defined, both Xmn and Xmx should be defined together.
if [ $DEFINED_XMN -eq 0 ] && [ $DEFINED_XMX -ne 0 ]; then
echo "Please set or unset -Xmx and -Xmn flags in pairs on jvm-server.options file."
exit 1
elif [ $DEFINED_XMN -ne 0 ] && [ $USING_CMS -eq 0 ]; then
JVM_OPTS="$JVM_OPTS -Xmn${HEAP_NEWSIZE}"
fi
# We fail to start if -Xmn is used with G1 GC is being used
# See comments for -Xmn in jvm-server.options
if [ $DEFINED_XMN -eq 0 ] && [ $USING_G1 -eq 0 ]; then
echo "It is not recommended to set -Xmn with the G1 garbage collector. See comments for -Xmn in jvm-server.options for details."
exit 1
fi
if [ "$JVM_ARCH" = "64-Bit" ] && [ $USING_CMS -eq 0 ]; then
JVM_OPTS="$JVM_OPTS -XX:+UseCondCardMark"
fi
# provides hints to the JIT compiler
JVM_OPTS="$JVM_OPTS -XX:CompileCommandFile=$CASSANDRA_CONF/hotspot_compiler"
# add the jamm javaagent
JVM_OPTS="$JVM_OPTS -javaagent:$CASSANDRA_HOME/lib/jamm-0.3.2.jar"
# set jvm HeapDumpPath with CASSANDRA_HEAPDUMP_DIR
if [ "x$CASSANDRA_HEAPDUMP_DIR" != "x" ]; then
JVM_OPTS="$JVM_OPTS -XX:HeapDumpPath=$CASSANDRA_HEAPDUMP_DIR/cassandra-`date +%s`-pid$$.hprof"
fi
# stop the jvm on OutOfMemoryError as it can result in some data corruption
# uncomment the preferred option
# ExitOnOutOfMemoryError and CrashOnOutOfMemoryError require a JRE greater or equals to 1.7 update 101 or 1.8 update 92
# For OnOutOfMemoryError we cannot use the JVM_OPTS variables because bash commands split words
# on white spaces without taking quotes into account
# JVM_OPTS="$JVM_OPTS -XX:+ExitOnOutOfMemoryError"
# JVM_OPTS="$JVM_OPTS -XX:+CrashOnOutOfMemoryError"
JVM_ON_OUT_OF_MEMORY_ERROR_OPT="-XX:OnOutOfMemoryError=kill -9 %p"
# print an heap histogram on OutOfMemoryError
# JVM_OPTS="$JVM_OPTS -Dcassandra.printHeapHistogramOnOutOfMemoryError=true"
# jmx: metrics and administration interface
#
# add this if you're having trouble connecting:
# JVM_OPTS="$JVM_OPTS -Djava.rmi.server.hostname=<public name>"
#
# see
# https://blogs.oracle.com/jmxetc/entry/troubleshooting_connection_problems_in_jconsole
# for more on configuring JMX through firewalls, etc. (Short version:
# get it working with no firewall first.)
#
# Cassandra ships with JMX accessible *only* from localhost.
# To enable remote JMX connections, uncomment lines below
# with authentication and/or ssl enabled. See https://wiki.apache.org/cassandra/JmxSecurity
#
if [ "x$LOCAL_JMX" = "x" ]; then
LOCAL_JMX=yes
fi
# Specifies the default port over which Cassandra will be available for
# JMX connections.
# For security reasons, you should not expose this port to the internet. Firewall it if needed.
JMX_PORT="7199"
if [ "$LOCAL_JMX" = "yes" ]; then
JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.local.port=$JMX_PORT"
JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=false"
else
JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.port=$JMX_PORT"
# if ssl is enabled the same port cannot be used for both jmx and rmi so either
# pick another value for this property or comment out to use a random port (though see CASSANDRA-7087 for origins)
JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.rmi.port=$JMX_PORT"
# turn on JMX authentication. See below for further options
JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=true"
# jmx ssl options
#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl=true"
#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.need.client.auth=true"
#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.enabled.protocols=<enabled-protocols>"
#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.enabled.cipher.suites=<enabled-cipher-suites>"
#JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.keyStore=/path/to/keystore"
#JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.keyStorePassword=<keystore-password>"
#JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.trustStore=/path/to/truststore"
#JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.trustStorePassword=<truststore-password>"
fi
# jmx authentication and authorization options. By default, auth is only
# activated for remote connections but they can also be enabled for local only JMX
## Basic file based authn & authz
JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.password.file=/etc/cassandra/jmxremote.password"
#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.access.file=/etc/cassandra/jmxremote.access"
## Custom auth settings which can be used as alternatives to JMX's out of the box auth utilities.
## JAAS login modules can be used for authentication by uncommenting these two properties.
## Cassandra ships with a LoginModule implementation - org.apache.cassandra.auth.CassandraLoginModule -
## which delegates to the IAuthenticator configured in cassandra.yaml. See the sample JAAS configuration
## file cassandra-jaas.config
#JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.login.config=CassandraLogin"
#JVM_OPTS="$JVM_OPTS -Djava.security.auth.login.config=$CASSANDRA_CONF/cassandra-jaas.config"
## Cassandra also ships with a helper for delegating JMX authz calls to the configured IAuthorizer,
## uncomment this to use it. Requires one of the two authentication options to be enabled
#JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.authorizer=org.apache.cassandra.auth.jmx.AuthorizationProxy"
# To use mx4j, an HTML interface for JMX, add mx4j-tools.jar to the lib/
# directory.
# See http://cassandra.apache.org/doc/latest/operating/metrics.html#jmx
# By default mx4j listens on the broadcast_address, port 8081. Uncomment the following lines
# to control its listen address and port.
#MX4J_ADDRESS="127.0.0.1"
#MX4J_PORT="8081"
# Cassandra uses SIGAR to capture OS metrics CASSANDRA-7838
# for SIGAR we have to set the java.library.path
# to the location of the native libraries.
JVM_OPTS="$JVM_OPTS -Djava.library.path=$CASSANDRA_HOME/lib/sigar-bin"
if [ "x$MX4J_ADDRESS" != "x" ]; then
if [[ "$MX4J_ADDRESS" == \-Dmx4jaddress* ]]; then
# Backward compatible with the older style #13578
JVM_OPTS="$JVM_OPTS $MX4J_ADDRESS"
else
JVM_OPTS="$JVM_OPTS -Dmx4jaddress=$MX4J_ADDRESS"
fi
fi
if [ "x$MX4J_PORT" != "x" ]; then
if [[ "$MX4J_PORT" == \-Dmx4jport* ]]; then
# Backward compatible with the older style #13578
JVM_OPTS="$JVM_OPTS $MX4J_PORT"
else
JVM_OPTS="$JVM_OPTS -Dmx4jport=$MX4J_PORT"
fi
fi
JVM_OPTS="$JVM_OPTS $JVM_EXTRA_OPTS"

View File

@ -0,0 +1,4 @@
// Delegates authentication to Cassandra's configured IAuthenticator
CassandraLogin {
org.apache.cassandra.auth.CassandraLoginModule REQUIRED;
};

View File

@ -0,0 +1,39 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# These properties are used with GossipingPropertyFileSnitch and will
# indicate the rack and dc for this node
dc= Mars
rack= West
# Add a suffix to a datacenter name. Used by the Ec2Snitch and Ec2MultiRegionSnitch
# to append a string to the EC2 region name.
#dc_suffix=
# Uncomment the following line to make this snitch prefer the internal ip when possible, as the Ec2MultiRegionSnitch does.
# prefer_local=true
# Datacenter and rack naming convention used by the Ec2Snitch and Ec2MultiRegionSnitch.
# Options are:
# legacy : datacenter name is the part of the availability zone name preceding the last "-"
# when the zone ends in -1 and includes the number if not -1. Rack is the portion of
# the availability zone name following the last "-".
# Examples: us-west-1a => dc: us-west, rack: 1a; us-west-2b => dc: us-west-2, rack: 2b;
# YOU MUST USE THIS VALUE IF YOU ARE UPGRADING A PRE-4.0 CLUSTER
# standard : Default value. datacenter name is the standard AWS region name, including the number.
# rack name is the region plus the availability zone letter.
# Examples: us-west-1a => dc: us-west-1, rack: us-west-1a; us-west-2b => dc: us-west-2, rack: us-west-2b;
# ec2_naming_scheme=standard

View File

@ -0,0 +1,41 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Cassandra Node IP=Data Center:Rack
192.168.1.100=DC1:RAC1
192.168.2.200=DC2:RAC2
10.0.0.10=DC1:RAC1
10.0.0.11=DC1:RAC1
10.0.0.12=DC1:RAC2
10.20.114.10=DC2:RAC1
10.20.114.11=DC2:RAC1
10.21.119.13=DC3:RAC1
10.21.119.10=DC3:RAC1
10.0.0.13=DC1:RAC2
10.21.119.14=DC3:RAC2
10.20.114.15=DC2:RAC2
# default for unknown nodes
default=DC1:r1
# Native IPv6 is supported, however you must escape the colon in the IPv6 Address
# Also be sure to comment out JVM_OPTS="$JVM_OPTS -Djava.net.preferIPv4Stack=true"
# in cassandra-env.sh
fe80\:0\:0\:0\:202\:b3ff\:fe1e\:8329=DC1:RAC3

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,48 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# commitlog archiving configuration. Leave blank to disable.
# Command to execute to archive a commitlog segment
# Parameters: %path => Fully qualified path of the segment to archive
# %name => Name of the commit log.
# Example: archive_command=/bin/ln %path /backup/%name
#
# Limitation: *_command= expects one command with arguments. STDOUT
# and STDIN or multiple commands cannot be executed. You might want
# to script multiple commands and add a pointer here.
archive_command=
# Command to execute to make an archived commitlog live again.
# Parameters: %from is the full path to an archived commitlog segment (from restore_directories)
# %to is the live commitlog directory
# Example: restore_command=/bin/cp -f %from %to
restore_command=
# Directory to scan the recovery files in.
restore_directories=
# Restore mutations created up to and including this timestamp in GMT.
# Format: yyyy:MM:dd HH:mm:ss (2012:04:31 20:43:12)
#
# Recovery will continue through the segment when the first client-supplied
# timestamp greater than this time is encountered, but only mutations less than
# or equal to this timestamp will be applied.
restore_point_in_time=
# precision of the timestamp used in the inserts (MILLISECONDS, MICROSECONDS, ...)
precision=MICROSECONDS

View File

@ -0,0 +1,238 @@
; Licensed to the Apache Software Foundation (ASF) under one
; or more contributor license agreements. See the NOTICE file
; distributed with this work for additional information
; regarding copyright ownership. The ASF licenses this file
; to you under the Apache License, Version 2.0 (the
; "License"); you may not use this file except in compliance
; with the License. You may obtain a copy of the License at
;
; http://www.apache.org/licenses/LICENSE-2.0
;
; Unless required by applicable law or agreed to in writing,
; software distributed under the License is distributed on an
; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
; KIND, either express or implied. See the License for the
; specific language governing permissions and limitations
; under the License.
;
; Sample ~/.cqlshrc file.
[authentication]
;; If Cassandra has auth enabled, fill out these options
; username = fred
; password = !!bang!!$
; keyspace = ks1
[ui]
;; Whether or not to display query results with colors
; color = on
;; Used for displaying timestamps (and reading them with COPY)
; time_format = %Y-%m-%d %H:%M:%S%z
;; Display timezone
;timezone = Etc/UTC
;; The number of digits displayed after the decimal point for single and double precision numbers
;; (note that increasing this to large numbers can result in unusual values)
;float_precision = 5
;double_precision = 12
;; Used for automatic completion and suggestions
; completekey = tab
;; The encoding used for characters
; encoding = utf8
; To use another than the system default browser for cqlsh HELP to open
; the CQL doc HTML, use the 'browser' preference.
; If the field value is empty or not specified, cqlsh will use the
; default browser (specifying 'browser = default' does not work).
;
; Supported browsers are those supported by the Python webbrowser module.
; (https://docs.python.org/3/library/webbrowser.html).
;
; Hint: to use Google Chome, use
; 'browser = open -a /Applications/Google\ Chrome.app %s' on Mac OS X and
; 'browser = /usr/bin/google-chrome-stable %s' on Linux and
; 'browser = C:/Program Files (x86)/Google/Chrome/Application/chrome.exe %s' on Windows.
;
; This setting can be overridden with the --browser command line option.
;
;browser =
[cql]
;; A version of CQL to use (this should almost never be set)
; version = 3.2.1
[connection]
;; The host to connect to
hostname = 127.0.0.1
;; The port to connect to (9042 is the native protocol default)
port = 9042
;; Always connect using SSL - false by default
; ssl = true
;; A timeout in seconds for opening new connections
; timeout = 10
;; A timeout in seconds for executing queries
; request_timeout = 10
[csv]
;; The size limit for parsed fields
; field_size_limit = 131072
[tracing]
;; The max number of seconds to wait for a trace to complete
; max_trace_wait = 10.0
;[ssl]
; certfile = ~/keys/cassandra.cert
;; Optional - true by default.
;validate = true
;; To be provided when require_client_auth=true
;userkey = ~/key.pem
;; To be provided when require_client_auth=true
;usercert = ~/cert.pem
;; Optional section, overrides default certfile in [ssl] section, if present
; [certfiles]
; 192.168.1.3 = ~/keys/cassandra01.cert
; 192.168.1.4 = ~/keys/cassandra02.cert
;; Options that are common to both COPY TO and COPY FROM
; [copy]
;; The string placeholder for null values
; nullval = null
;; For COPY TO, controls whether the first line in the CSV output file will
;; contain the column names. For COPY FROM, specifies whether the first
;; line in the CSV file contains column names.
; header = false
;; The character that is used as the decimal point separator
; decimalsep = .
;; The character that is used to separate thousands
;; (defaults to the empty string)
; thousandssep =
;; The string literal format for boolean values
; boolstyle = True,False
;; The number of child worker processes to create for
;; COPY tasks. Defaults to a max of 4 for COPY FROM and 16
;; for COPY TO. However, at most (num_cores - 1) processes
;; will be created.
; numprocesses =
;; The maximum number of failed attempts to fetch a range of data (when using
;; COPY TO) or insert a chunk of data (when using COPY FROM) before giving up
; maxattempts = 5
;; How often status updates are refreshed, in seconds
; reportfrequency = 0.25
;; An optional file to output rate statistics to
; ratefile =
;; Options specific to COPY TO
; [copy-to]
;; The maximum number token ranges to fetch simultaneously
; maxrequests = 6
;; The number of rows to fetch in a single page
; pagesize = 1000
;; By default the page timeout is 10 seconds per 1000 entries
;; in the page size or 10 seconds if pagesize is smaller
; pagetimeout = 10
;; Token range to export. Defaults to exporting the full ring.
; begintoken =
; endtoken =
; The maximum size of the output file measured in number of lines;
; beyond this maximum the output file will be split into segments.
; -1 means unlimited.
; maxoutputsize = -1
;; The encoding used for characters
; encoding = utf8
;; Options specific to COPY FROM
; [copy-from]
;; The maximum number of rows to process per second
; ingestrate = 100000
;; The maximum number of rows to import (-1 means unlimited)
; maxrows = -1
;; A number of initial rows to skip
; skiprows = 0
;; A comma-separated list of column names to ignore
; skipcols =
;; The maximum global number of parsing errors to ignore, -1 means unlimited
; maxparseerrors = -1
;; The maximum global number of insert errors to ignore, -1 means unlimited
; maxinserterrors = 1000
;; A file to store all rows that could not be imported, by default this is
;; import_<ks>_<table>.err where <ks> is your keyspace and <table> is your table name.
; errfile =
;; The min and max number of rows inserted in a single batch
; maxbatchsize = 20
; minbatchsize = 2
;; The number of rows that are passed to child worker processes from
;; the main process at a time
; chunksize = 1000
;; The options for COPY can also be specified per-table. The following
;; three sections demonstrate this.
;; Optional table-specific options for COPY
; [copy:mykeyspace.mytable]
; chunksize = 1000
;; Optional table-specific options for COPY FROM
; [copy-from:mykeyspace.mytable]
; ingestrate = 20000
;; Optional table-specific options for COPY TO
; [copy-to:mykeyspace.mytable]
; pagetimeout = 30

View File

@ -0,0 +1,32 @@
dontinline org.apache.cassandra.db.Columns$Serializer::deserializeLargeSubset (Lorg.apache.cassandra.io.util.DataInputPlus;Lorg.apache.cassandra.db.Columns;I)Lorg.apache.cassandra.db.Columns;
dontinline org.apache.cassandra.db.Columns$Serializer::serializeLargeSubset (Ljava.util.Collection;ILorg.apache.cassandra.db.Columns;ILorg.apache.cassandra.io.util.DataOutputPlus;)V
dontinline org.apache.cassandra.db.Columns$Serializer::serializeLargeSubsetSize (Ljava.util.Collection;ILorg.apache.cassandra.db.Columns;I)I
dontinline org.apache.cassandra.db.commitlog.AbstractCommitLogSegmentManager::advanceAllocatingFrom (Lorg.apache.cassandra.db.commitlog.CommitLogSegment;)V
dontinline org.apache.cassandra.db.transform.BaseIterator::tryGetMoreContents ()Z
dontinline org.apache.cassandra.db.transform.StoppingTransformation::stop ()V
dontinline org.apache.cassandra.db.transform.StoppingTransformation::stopInPartition ()V
dontinline org.apache.cassandra.io.util.BufferedDataOutputStreamPlus::doFlush (I)V
dontinline org.apache.cassandra.io.util.BufferedDataOutputStreamPlus::writeSlow (JI)V
dontinline org.apache.cassandra.io.util.RebufferingInputStream::readPrimitiveSlowly (I)J
exclude org.apache.cassandra.utils.JVMStabilityInspector::forceHeapSpaceOomMaybe (Ljava.lang.OutOfMemoryError;)V
inline org.apache.cassandra.db.rows.UnfilteredSerializer::serializeRowBody (Lorg.apache.cassandra.db.rows.Row;ILorg.apache.cassandra.db.rows.SerializationHelper;Lorg.apache.cassandra.io.util.DataOutputPlus;)V
inline org.apache.cassandra.io.util.Memory::checkBounds (JJ)V
inline org.apache.cassandra.io.util.SafeMemory::checkBounds (JJ)V
inline org.apache.cassandra.net.FrameDecoderWith8bHeader::decode (Ljava.util.Collection;Lorg.apache.cassandra.net.ShareableBytes;I)V
inline org.apache.cassandra.service.reads.repair.RowIteratorMergeListener::applyToPartition (ILjava.util.function.Consumer;)V
inline org.apache.cassandra.utils.AsymmetricOrdering::selectBoundary (Lorg.apache.cassandra.utils.AsymmetricOrdering.Op;II)I
inline org.apache.cassandra.utils.AsymmetricOrdering::strictnessOfLessThan (Lorg.apache.cassandra.utils.AsymmetricOrdering.Op;)I
inline org.apache.cassandra.utils.BloomFilter::indexes (Lorg.apache.cassandra.utils.IFilter.FilterKey;)[J
inline org.apache.cassandra.utils.BloomFilter::setIndexes (JJIJ[J)V
inline org.apache.cassandra.utils.ByteBufferUtil::compare (Ljava.nio.ByteBuffer;[B)I
inline org.apache.cassandra.utils.ByteBufferUtil::compare ([BLjava.nio.ByteBuffer;)I
inline org.apache.cassandra.utils.ByteBufferUtil::compareUnsigned (Ljava.nio.ByteBuffer;Ljava.nio.ByteBuffer;)I
inline org.apache.cassandra.utils.FastByteOperations$UnsafeOperations::compareTo (Ljava.lang.Object;JILjava.lang.Object;JI)I
inline org.apache.cassandra.utils.FastByteOperations$UnsafeOperations::compareTo (Ljava.lang.Object;JILjava.nio.ByteBuffer;)I
inline org.apache.cassandra.utils.FastByteOperations$UnsafeOperations::compareTo (Ljava.nio.ByteBuffer;Ljava.nio.ByteBuffer;)I
inline org.apache.cassandra.utils.memory.BufferPool$LocalPool::tryGetInternal (IZ)Ljava.nio.ByteBuffer;
inline org.apache.cassandra.utils.vint.VIntCoding::encodeUnsignedVInt (JI)[B
inline org.apache.cassandra.utils.vint.VIntCoding::encodeUnsignedVInt (JI[B)V
inline org.apache.cassandra.utils.vint.VIntCoding::writeUnsignedVInt (JLjava.io.DataOutput;)V
inline org.apache.cassandra.utils.vint.VIntCoding::writeUnsignedVInt (JLjava.nio.ByteBuffer;)V
inline org.apache.cassandra.utils.vint.VIntCoding::writeVInt (JLjava.io.DataOutput;)V

View File

@ -0,0 +1,10 @@
###########################################################################
# jvm-clients.options #
# #
# See jvm8-clients.options and jvm11-clients.options for Java version #
# specific options. #
###########################################################################
# intentionally left empty
# The newline in the end of file is intentional

View File

@ -0,0 +1,188 @@
###########################################################################
# jvm-server.options #
# #
# - all flags defined here will be used by cassandra to startup the JVM #
# - one flag should be specified per line #
# - lines that do not start with '-' will be ignored #
# - only static flags are accepted (no variables or parameters) #
# - dynamic flags will be appended to these on cassandra-env #
# #
# See jvm8-server.options and jvm11-server.options for Java version #
# specific options. #
###########################################################################
######################
# STARTUP PARAMETERS #
######################
# Uncomment any of the following properties to enable specific startup parameters
# In a multi-instance deployment, multiple Cassandra instances will independently assume that all
# CPU processors are available to it. This setting allows you to specify a smaller set of processors
# and perhaps have affinity.
#-Dcassandra.available_processors=number_of_processors
# The directory location of the cassandra.yaml file.
#-Dcassandra.config=directory
# Sets the initial partitioner token for a node the first time the node is started.
#-Dcassandra.initial_token=token
# Set to false to start Cassandra on a node but not have the node join the cluster.
#-Dcassandra.join_ring=true|false
# Set to false to clear all gossip state for the node on restart. Use when you have changed node
# information in cassandra.yaml (such as listen_address).
#-Dcassandra.load_ring_state=true|false
# Enable pluggable metrics reporter. See Pluggable metrics reporting in Cassandra 2.0.2.
#-Dcassandra.metricsReporterConfigFile=file
# Set the port on which the CQL native transport listens for clients. (Default: 9042)
#-Dcassandra.native_transport_port=port
# Overrides the partitioner. (Default: org.apache.cassandra.dht.Murmur3Partitioner)
#-Dcassandra.partitioner=partitioner
# To replace a node that has died, restart a new node in its place specifying the address of the
# dead node. The new node must not have any data in its data directory, that is, it must be in the
# same state as before bootstrapping.
#-Dcassandra.replace_address=listen_address or broadcast_address of dead node
# Allow restoring specific tables from an archived commit log.
#-Dcassandra.replayList=table
# Allows overriding of the default RING_DELAY (30000ms), which is the amount of time a node waits
# before joining the ring.
#-Dcassandra.ring_delay_ms=ms
# Set the SSL port for encrypted communication. (Default: 7001)
#-Dcassandra.ssl_storage_port=port
# Set the port for inter-node communication. (Default: 7000)
#-Dcassandra.storage_port=port
# Set the default location for the trigger JARs. (Default: conf/triggers)
#-Dcassandra.triggers_dir=directory
# For testing new compaction and compression strategies. It allows you to experiment with different
# strategies and benchmark write performance differences without affecting the production workload.
#-Dcassandra.write_survey=true
# To disable configuration via JMX of auth caches (such as those for credentials, permissions and
# roles). This will mean those config options can only be set (persistently) in cassandra.yaml
# and will require a restart for new values to take effect.
#-Dcassandra.disable_auth_caches_remote_configuration=true
# To disable dynamic calculation of the page size used when indexing an entire partition (during
# initial index build/rebuild). If set to true, the page size will be fixed to the default of
# 10000 rows per page.
#-Dcassandra.force_default_indexing_page_size=true
# Imposes an upper bound on hint lifetime below the normal min gc_grace_seconds
#-Dcassandra.maxHintTTL=max_hint_ttl_in_seconds
########################
# GENERAL JVM SETTINGS #
########################
# enable assertions. highly suggested for correct application functionality.
-ea
# disable assertions for net.openhft.** because it runs out of memory by design
# if enabled and run for more than just brief testing
-da:net.openhft...
# enable thread priorities, primarily so we can give periodic tasks
# a lower priority to avoid interfering with client workload
-XX:+UseThreadPriorities
# Enable heap-dump if there's an OOM
-XX:+HeapDumpOnOutOfMemoryError
# Per-thread stack size.
-Xss256k
# Make sure all memory is faulted and zeroed on startup.
# This helps prevent soft faults in containers and makes
# transparent hugepage allocation more effective.
-XX:+AlwaysPreTouch
# Disable biased locking as it does not benefit Cassandra.
-XX:-UseBiasedLocking
# Enable thread-local allocation blocks and allow the JVM to automatically
# resize them at runtime.
-XX:+UseTLAB
-XX:+ResizeTLAB
-XX:+UseNUMA
# http://www.evanjones.ca/jvm-mmap-pause.html
-XX:+PerfDisableSharedMem
# Prefer binding to IPv4 network intefaces (when net.ipv6.bindv6only=1). See
# http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6342561 (short version:
# comment out this entry to enable IPv6 support).
-Djava.net.preferIPv4Stack=true
### Debug options
# uncomment to enable flight recorder
#-XX:+UnlockCommercialFeatures
#-XX:+FlightRecorder
# uncomment to have Cassandra JVM listen for remote debuggers/profilers on port 1414
#-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=1414
# uncomment to have Cassandra JVM log internal method compilation (developers only)
#-XX:+UnlockDiagnosticVMOptions
#-XX:+LogCompilation
#################
# HEAP SETTINGS #
#################
# Heap size is automatically calculated by cassandra-env based on this
# formula: max(min(1/2 ram, 1024MB), min(1/4 ram, 8GB))
# That is:
# - calculate 1/2 ram and cap to 1024MB
# - calculate 1/4 ram and cap to 8192MB
# - pick the max
#
# For production use you may wish to adjust this for your environment.
# If that's the case, uncomment the -Xmx and Xms options below to override the
# automatic calculation of JVM heap memory.
#
# It is recommended to set min (-Xms) and max (-Xmx) heap sizes to
# the same value to avoid stop-the-world GC pauses during resize, and
# so that we can lock the heap in memory on startup to prevent any
# of it from being swapped out.
#-Xms4G
#-Xmx4G
# Young generation size is automatically calculated by cassandra-env
# based on this formula: min(100 * num_cores, 1/4 * heap size)
#
# The main trade-off for the young generation is that the larger it
# is, the longer GC pause times will be. The shorter it is, the more
# expensive GC will be (usually).
#
# It is not recommended to set the young generation size if using the
# G1 GC, since that will override the target pause-time goal.
# More info: http://www.oracle.com/technetwork/articles/java/g1gc-1984535.html
#
# The example below assumes a modern 8-core+ machine for decent
# times. If in doubt, and if you do not particularly want to tweak, go
# 100 MB per physical CPU core.
#-Xmn800M
###################################
# EXPIRATION DATE OVERFLOW POLICY #
###################################
# Defines how to handle INSERT requests with TTL exceeding the maximum supported expiration date:
# * REJECT: this is the default policy and will reject any requests with expiration date timestamp after 2038-01-19T03:14:06+00:00.
# * CAP: any insert with TTL expiring after 2038-01-19T03:14:06+00:00 will expire on 2038-01-19T03:14:06+00:00 and the client will receive a warning.
# * CAP_NOWARN: same as previous, except that the client warning will not be emitted.
#
#-Dcassandra.expiration_date_overflow_policy=REJECT

View File

@ -0,0 +1,29 @@
###########################################################################
# jvm11-clients.options #
# #
# See jvm-clients.options. This file is specific for Java 11 and newer. #
###########################################################################
###################
# JPMS SETTINGS #
###################
-Djdk.attach.allowAttachSelf=true
--add-exports java.base/jdk.internal.misc=ALL-UNNAMED
--add-exports java.base/jdk.internal.ref=ALL-UNNAMED
--add-exports java.base/sun.nio.ch=ALL-UNNAMED
--add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED
--add-exports java.sql/java.sql=ALL-UNNAMED
--add-opens java.base/java.lang.module=ALL-UNNAMED
--add-opens java.base/jdk.internal.loader=ALL-UNNAMED
--add-opens java.base/jdk.internal.ref=ALL-UNNAMED
--add-opens java.base/jdk.internal.reflect=ALL-UNNAMED
--add-opens java.base/jdk.internal.math=ALL-UNNAMED
--add-opens java.base/jdk.internal.module=ALL-UNNAMED
--add-opens java.base/jdk.internal.util.jar=ALL-UNNAMED
--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED
# The newline in the end of file is intentional

View File

@ -0,0 +1,103 @@
###########################################################################
# jvm11-server.options #
# #
# See jvm-server.options. This file is specific for Java 11 and newer. #
###########################################################################
#################
# GC SETTINGS #
#################
### CMS Settings
-XX:+UseConcMarkSweepGC
-XX:+CMSParallelRemarkEnabled
-XX:SurvivorRatio=8
-XX:MaxTenuringThreshold=1
-XX:CMSInitiatingOccupancyFraction=75
-XX:+UseCMSInitiatingOccupancyOnly
-XX:CMSWaitDuration=10000
-XX:+CMSParallelInitialMarkEnabled
-XX:+CMSEdenChunksRecordAlways
## some JVMs will fill up their heap when accessed via JMX, see CASSANDRA-6541
-XX:+CMSClassUnloadingEnabled
### G1 Settings
## Use the Hotspot garbage-first collector.
#-XX:+UseG1GC
#-XX:+ParallelRefProcEnabled
#
## Have the JVM do less remembered set work during STW, instead
## preferring concurrent GC. Reduces p99.9 latency.
#-XX:G1RSetUpdatingPauseTimePercent=5
#
## Main G1GC tunable: lowering the pause target will lower throughput and vise versa.
## 200ms is the JVM default and lowest viable setting
## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml.
#-XX:MaxGCPauseMillis=500
## Optional G1 Settings
# Save CPU time on large (>= 16GB) heaps by delaying region scanning
# until the heap is 70% full. The default in Hotspot 8u40 is 40%.
#-XX:InitiatingHeapOccupancyPercent=70
# For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores.
# Otherwise equal to the number of cores when 8 or less.
# Machines with > 10 cores should try setting these to <= full cores.
#-XX:ParallelGCThreads=16
# By default, ConcGCThreads is 1/4 of ParallelGCThreads.
# Setting both to the same value can reduce STW durations.
#-XX:ConcGCThreads=16
### JPMS
-Djdk.attach.allowAttachSelf=true
--add-exports java.base/jdk.internal.misc=ALL-UNNAMED
--add-exports java.base/jdk.internal.ref=ALL-UNNAMED
--add-exports java.base/sun.nio.ch=ALL-UNNAMED
--add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED
--add-exports java.sql/java.sql=ALL-UNNAMED
--add-opens java.base/java.lang.module=ALL-UNNAMED
--add-opens java.base/jdk.internal.loader=ALL-UNNAMED
--add-opens java.base/jdk.internal.ref=ALL-UNNAMED
--add-opens java.base/jdk.internal.reflect=ALL-UNNAMED
--add-opens java.base/jdk.internal.math=ALL-UNNAMED
--add-opens java.base/jdk.internal.module=ALL-UNNAMED
--add-opens java.base/jdk.internal.util.jar=ALL-UNNAMED
--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED
### GC logging options -- uncomment to enable
# Java 11 (and newer) GC logging options:
# See description of https://bugs.openjdk.java.net/browse/JDK-8046148 for details about the syntax
# The following is the equivalent to -XX:+PrintGCDetails -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M
#-Xlog:gc=info,heap*=trace,age*=debug,safepoint=info,promotion*=trace:file=/var/log/cassandra/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760
# Notes for Java 8 migration:
#
# -XX:+PrintGCDetails maps to -Xlog:gc*:... - i.e. add a '*' after "gc"
# -XX:+PrintGCDateStamps maps to decorator 'time'
#
# -XX:+PrintHeapAtGC maps to 'heap' with level 'trace'
# -XX:+PrintTenuringDistribution maps to 'age' with level 'debug'
# -XX:+PrintGCApplicationStoppedTime maps to 'safepoint' with level 'info'
# -XX:+PrintPromotionFailure maps to 'promotion' with level 'trace'
# -XX:PrintFLSStatistics=1 maps to 'freelist' with level 'trace'
### Netty Options
# On Java >= 9 Netty requires the io.netty.tryReflectionSetAccessible system property to be set to true to enable
# creation of direct buffers using Unsafe. Without it, this falls back to ByteBuffer.allocateDirect which has
# inferior performance and risks exceeding MaxDirectMemory
-Dio.netty.tryReflectionSetAccessible=true
# The newline in the end of file is intentional

View File

@ -0,0 +1,9 @@
###########################################################################
# jvm8-clients.options #
# #
# See jvm-clients.options. This file is specific for Java 8 and newer. #
###########################################################################
# intentionally left empty
# The newline in the end of file is intentional

View File

@ -0,0 +1,76 @@
###########################################################################
# jvm8-server.options #
# #
# See jvm-server.options. This file is specific for Java 8 and newer. #
###########################################################################
########################
# GENERAL JVM SETTINGS #
########################
# allows lowering thread priority without being root on linux - probably
# not necessary on Windows but doesn't harm anything.
# see http://tech.stolsvik.com/2010/01/linux-java-thread-priorities-workaround.html
-XX:ThreadPriorityPolicy=42
#################
# GC SETTINGS #
#################
### CMS Settings
-XX:+UseParNewGC
-XX:+UseConcMarkSweepGC
-XX:+CMSParallelRemarkEnabled
-XX:SurvivorRatio=8
-XX:MaxTenuringThreshold=1
-XX:CMSInitiatingOccupancyFraction=75
-XX:+UseCMSInitiatingOccupancyOnly
-XX:CMSWaitDuration=10000
-XX:+CMSParallelInitialMarkEnabled
-XX:+CMSEdenChunksRecordAlways
## some JVMs will fill up their heap when accessed via JMX, see CASSANDRA-6541
-XX:+CMSClassUnloadingEnabled
### G1 Settings
## Use the Hotspot garbage-first collector.
#-XX:+UseG1GC
#-XX:+ParallelRefProcEnabled
#
## Have the JVM do less remembered set work during STW, instead
## preferring concurrent GC. Reduces p99.9 latency.
#-XX:G1RSetUpdatingPauseTimePercent=5
#
## Main G1GC tunable: lowering the pause target will lower throughput and vise versa.
## 200ms is the JVM default and lowest viable setting
## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml.
#-XX:MaxGCPauseMillis=500
## Optional G1 Settings
# Save CPU time on large (>= 16GB) heaps by delaying region scanning
# until the heap is 70% full. The default in Hotspot 8u40 is 40%.
#-XX:InitiatingHeapOccupancyPercent=70
# For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores.
# Otherwise equal to the number of cores when 8 or less.
# Machines with > 10 cores should try setting these to <= full cores.
#-XX:ParallelGCThreads=16
# By default, ConcGCThreads is 1/4 of ParallelGCThreads.
# Setting both to the same value can reduce STW durations.
#-XX:ConcGCThreads=16
### GC logging options -- uncomment to enable
-XX:+PrintGCDetails
-XX:+PrintGCDateStamps
-XX:+PrintHeapAtGC
-XX:+PrintTenuringDistribution
-XX:+PrintGCApplicationStoppedTime
-XX:+PrintPromotionFailure
#-XX:PrintFLSStatistics=1
#-Xloggc:/var/log/cassandra/gc.log
-XX:+UseGCLogFileRotation
-XX:NumberOfGCLogFiles=10
-XX:GCLogFileSize=10M
# The newline in the end of file is intentional

View File

@ -0,0 +1,34 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<configuration>
<appender name="STDERR" class="ch.qos.logback.core.ConsoleAppender">
<target>System.err</target>
<encoder>
<pattern>%-5level %date{"HH:mm:ss,SSS"} %msg%n</pattern>
</encoder>
<filter class="ch.qos.logback.classic.filter.ThresholdFilter">
<level>WARN</level>
</filter>
</appender>
<root level="WARN">
<appender-ref ref="STDERR" />
</root>
</configuration>

View File

@ -0,0 +1,124 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<!--
In order to disable debug.log, comment-out the ASYNCDEBUGLOG
appender reference in the root level section below.
-->
<configuration scan="true" scanPeriod="60 seconds">
<jmxConfigurator />
<!-- No shutdown hook; we run it ourselves in StorageService after shutdown -->
<!-- SYSTEMLOG rolling file appender to system.log (INFO level) -->
<appender name="SYSTEMLOG" class="ch.qos.logback.core.rolling.RollingFileAppender">
<filter class="ch.qos.logback.classic.filter.ThresholdFilter">
<level>INFO</level>
</filter>
<file>${cassandra.logdir}/system.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<!-- rollover daily -->
<fileNamePattern>${cassandra.logdir}/system.log.%d{yyyy-MM-dd}.%i.zip</fileNamePattern>
<!-- each file should be at most 50MB, keep 7 days worth of history, but at most 5GB -->
<maxFileSize>50MB</maxFileSize>
<maxHistory>7</maxHistory>
<totalSizeCap>5GB</totalSizeCap>
</rollingPolicy>
<encoder>
<pattern>%-5level [%thread] %date{ISO8601} %F:%L - %msg%n</pattern>
</encoder>
</appender>
<!-- DEBUGLOG rolling file appender to debug.log (all levels) -->
<appender name="DEBUGLOG" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${cassandra.logdir}/debug.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<!-- rollover daily -->
<fileNamePattern>${cassandra.logdir}/debug.log.%d{yyyy-MM-dd}.%i.zip</fileNamePattern>
<!-- each file should be at most 50MB, keep 7 days worth of history, but at most 5GB -->
<maxFileSize>50MB</maxFileSize>
<maxHistory>7</maxHistory>
<totalSizeCap>5GB</totalSizeCap>
</rollingPolicy>
<encoder>
<pattern>%-5level [%thread] %date{ISO8601} %F:%L - %msg%n</pattern>
</encoder>
</appender>
<!-- ASYNCLOG assynchronous appender to debug.log (all levels) -->
<appender name="ASYNCDEBUGLOG" class="ch.qos.logback.classic.AsyncAppender">
<queueSize>1024</queueSize>
<discardingThreshold>0</discardingThreshold>
<includeCallerData>true</includeCallerData>
<appender-ref ref="DEBUGLOG" />
</appender>
<!-- STDOUT console appender to stdout (INFO level) -->
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<filter class="ch.qos.logback.classic.filter.ThresholdFilter">
<level>INFO</level>
</filter>
<encoder>
<pattern>%-5level [%thread] %date{ISO8601} %F:%L - %msg%n</pattern>
</encoder>
</appender>
<!-- Uncomment below configuration (Audit Logging (FileAuditLogger) rolling file appender and Audit Logging
additivity) in order to have the log events flow through separate log file instead of system.log.
Audit Logging (FileAuditLogger) rolling file appender to audit.log -->
<!-- <appender name="AUDIT" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${cassandra.logdir}/audit/audit.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy"> -->
<!-- rollover daily -->
<!-- <fileNamePattern>${cassandra.logdir}/audit/audit.log.%d{yyyy-MM-dd}.%i.zip</fileNamePattern> -->
<!-- each file should be at most 50MB, keep 30 days worth of history, but at most 5GB -->
<!-- <maxFileSize>50MB</maxFileSize>
<maxHistory>30</maxHistory>
<totalSizeCap>5GB</totalSizeCap>
</rollingPolicy>
<encoder>
<pattern>%-5level [%thread] %date{ISO8601} %F:%L - %msg%n</pattern>
</encoder>
</appender> -->
<!-- Audit Logging additivity to redirect audit logging events to audit/audit.log -->
<!-- <logger name="org.apache.cassandra.audit" additivity="false" level="INFO">
<appender-ref ref="AUDIT"/>
</logger> -->
<!-- Uncomment bellow and corresponding appender-ref to activate logback metrics
<appender name="LogbackMetrics" class="com.codahale.metrics.logback.InstrumentedAppender" />
-->
<root level="INFO">
<appender-ref ref="SYSTEMLOG" />
<appender-ref ref="STDOUT" />
<appender-ref ref="ASYNCDEBUGLOG" /> <!-- Comment this line to disable debug.log -->
<!--
<appender-ref ref="LogbackMetrics" />
-->
</root>
<logger name="org.apache.cassandra" level="DEBUG"/>
</configuration>

View File

@ -0,0 +1,41 @@
# For details see:
# * http://wiki.apache.org/cassandra/Metrics
# * https://github.com/addthis/metrics-reporter-config
# This is an example file for configuring which metrics should go
# where. The sample sends everything to a flat file for humans to
# poke at. metrics-ganglia or metrics-graphite are more likely to
# operationally useful.
# Some metrics are global for a node (KeyCache capacity) while others
# are broken down by column family or even IP. The sample list
# includes all of the global metrics via a while list. To include
# metrics for the system column family for example add
# "^org.apache.cassandra.metrics.ColumnFamily.system.+".
# Start Cassandra with
# -Dcassandra.metricsReporterConfigFile=metrics-reporter-config.yaml
# for this file to be used. If you are using metrics-ganglia,
# metrics-graphite, or a custom reporter you will also have to add those
# jars to the lib directory. Nothing in this file can affect
# jmx metrics.
console:
-
outfile: '/tmp/metrics.out'
period: 10
timeunit: 'SECONDS'
predicate:
color: "white"
useQualifiedName: true
patterns:
- "^org.apache.cassandra.metrics.Cache.+"
- "^org.apache.cassandra.metrics.ClientRequest.+" # includes ClientRequestMetrics
- "^org.apache.cassandra.metrics.CommitLog.+"
- "^org.apache.cassandra.metrics.Compaction.+"
- "^org.apache.cassandra.metrics.DroppedMessage.+"
- "^org.apache.cassandra.metrics.ReadRepair.+"
- "^org.apache.cassandra.metrics.Storage.+"
- "^org.apache.cassandra.metrics.ThreadPools.+"

View File

@ -0,0 +1 @@
Place triggers to be loaded in this directory, as jar files.

11
etc/cass1/.gitignore vendored 100644
View File

@ -0,0 +1,11 @@
# ignore everything
#*
# except
# .gitignore
# !cassandra.yaml
# !jvm*
# !logback.xml
# !commitlog_archiving.properties
# !cassandra-rackdc.properties
# !cassandra-env.sh

View File

@ -0,0 +1,307 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
calculate_heap_sizes()
{
case "`uname`" in
Linux)
system_memory_in_mb=`free -m | awk '/:/ {print $2;exit}'`
system_cpu_cores=`egrep -c 'processor([[:space:]]+):.*' /proc/cpuinfo`
;;
FreeBSD)
system_memory_in_bytes=`sysctl hw.physmem | awk '{print $2}'`
system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024`
system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'`
;;
SunOS)
system_memory_in_mb=`prtconf | awk '/Memory size:/ {print $3}'`
system_cpu_cores=`psrinfo | wc -l`
;;
Darwin)
system_memory_in_bytes=`sysctl hw.memsize | awk '{print $2}'`
system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024`
system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'`
;;
*)
# assume reasonable defaults for e.g. a modern desktop or
# cheap server
system_memory_in_mb="2048"
system_cpu_cores="2"
;;
esac
# some systems like the raspberry pi don't report cores, use at least 1
if [ "$system_cpu_cores" -lt "1" ]
then
system_cpu_cores="1"
fi
# set max heap size based on the following
# max(min(1/2 ram, 1024MB), min(1/4 ram, 8GB))
# calculate 1/2 ram and cap to 1024MB
# calculate 1/4 ram and cap to 8192MB
# pick the max
half_system_memory_in_mb=`expr $system_memory_in_mb / 2`
quarter_system_memory_in_mb=`expr $half_system_memory_in_mb / 2`
if [ "$half_system_memory_in_mb" -gt "1024" ]
then
half_system_memory_in_mb="1024"
fi
if [ "$quarter_system_memory_in_mb" -gt "8192" ]
then
quarter_system_memory_in_mb="8192"
fi
if [ "$half_system_memory_in_mb" -gt "$quarter_system_memory_in_mb" ]
then
max_heap_size_in_mb="$half_system_memory_in_mb"
else
max_heap_size_in_mb="$quarter_system_memory_in_mb"
fi
MAX_HEAP_SIZE="${max_heap_size_in_mb}M"
# Young gen: min(max_sensible_per_modern_cpu_core * num_cores, 1/4 * heap size)
max_sensible_yg_per_core_in_mb="100"
max_sensible_yg_in_mb=`expr $max_sensible_yg_per_core_in_mb "*" $system_cpu_cores`
desired_yg_in_mb=`expr $max_heap_size_in_mb / 4`
if [ "$desired_yg_in_mb" -gt "$max_sensible_yg_in_mb" ]
then
HEAP_NEWSIZE="${max_sensible_yg_in_mb}M"
else
HEAP_NEWSIZE="${desired_yg_in_mb}M"
fi
}
# Sets the path where logback and GC logs are written.
if [ "x$CASSANDRA_LOG_DIR" = "x" ] ; then
CASSANDRA_LOG_DIR="$CASSANDRA_HOME/logs"
fi
#GC log path has to be defined here because it needs to access CASSANDRA_HOME
if [ $JAVA_VERSION -ge 11 ] ; then
# See description of https://bugs.openjdk.java.net/browse/JDK-8046148 for details about the syntax
# The following is the equivalent to -XX:+PrintGCDetails -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M
echo "$JVM_OPTS" | grep -qe "-[X]log:gc"
if [ "$?" = "1" ] ; then # [X] to prevent ccm from replacing this line
# only add -Xlog:gc if it's not mentioned in jvm-server.options file
mkdir -p ${CASSANDRA_LOG_DIR}
JVM_OPTS="$JVM_OPTS -Xlog:gc=info,heap*=trace,age*=debug,safepoint=info,promotion*=trace:file=${CASSANDRA_LOG_DIR}/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760"
fi
else
# Java 8
echo "$JVM_OPTS" | grep -qe "-[X]loggc"
if [ "$?" = "1" ] ; then # [X] to prevent ccm from replacing this line
# only add -Xlog:gc if it's not mentioned in jvm-server.options file
mkdir -p ${CASSANDRA_LOG_DIR}
JVM_OPTS="$JVM_OPTS -Xloggc:${CASSANDRA_LOG_DIR}/gc.log"
fi
fi
# Check what parameters were defined on jvm-server.options file to avoid conflicts
echo $JVM_OPTS | grep -q Xmn
DEFINED_XMN=$?
echo $JVM_OPTS | grep -q Xmx
DEFINED_XMX=$?
echo $JVM_OPTS | grep -q Xms
DEFINED_XMS=$?
echo $JVM_OPTS | grep -q UseConcMarkSweepGC
USING_CMS=$?
echo $JVM_OPTS | grep -q +UseG1GC
USING_G1=$?
# Override these to set the amount of memory to allocate to the JVM at
# start-up. For production use you may wish to adjust this for your
# environment. MAX_HEAP_SIZE is the total amount of memory dedicated
# to the Java heap. HEAP_NEWSIZE refers to the size of the young
# generation. Both MAX_HEAP_SIZE and HEAP_NEWSIZE should be either set
# or not (if you set one, set the other).
#
# The main trade-off for the young generation is that the larger it
# is, the longer GC pause times will be. The shorter it is, the more
# expensive GC will be (usually).
#
# The example HEAP_NEWSIZE assumes a modern 8-core+ machine for decent pause
# times. If in doubt, and if you do not particularly want to tweak, go with
# 100 MB per physical CPU core.
#MAX_HEAP_SIZE="4G"
#HEAP_NEWSIZE="800M"
# Set this to control the amount of arenas per-thread in glibc
#export MALLOC_ARENA_MAX=4
# only calculate the size if it's not set manually
if [ "x$MAX_HEAP_SIZE" = "x" ] && [ "x$HEAP_NEWSIZE" = "x" -o $USING_G1 -eq 0 ]; then
calculate_heap_sizes
elif [ "x$MAX_HEAP_SIZE" = "x" ] || [ "x$HEAP_NEWSIZE" = "x" -a $USING_G1 -ne 0 ]; then
echo "please set or unset MAX_HEAP_SIZE and HEAP_NEWSIZE in pairs when using CMS GC (see cassandra-env.sh)"
exit 1
fi
if [ "x$MALLOC_ARENA_MAX" = "x" ] ; then
export MALLOC_ARENA_MAX=4
fi
# We only set -Xms and -Xmx if they were not defined on jvm-server.options file
# If defined, both Xmx and Xms should be defined together.
if [ $DEFINED_XMX -ne 0 ] && [ $DEFINED_XMS -ne 0 ]; then
JVM_OPTS="$JVM_OPTS -Xms${MAX_HEAP_SIZE}"
JVM_OPTS="$JVM_OPTS -Xmx${MAX_HEAP_SIZE}"
elif [ $DEFINED_XMX -ne 0 ] || [ $DEFINED_XMS -ne 0 ]; then
echo "Please set or unset -Xmx and -Xms flags in pairs on jvm-server.options file."
exit 1
fi
# We only set -Xmn flag if it was not defined in jvm-server.options file
# and if the CMS GC is being used
# If defined, both Xmn and Xmx should be defined together.
if [ $DEFINED_XMN -eq 0 ] && [ $DEFINED_XMX -ne 0 ]; then
echo "Please set or unset -Xmx and -Xmn flags in pairs on jvm-server.options file."
exit 1
elif [ $DEFINED_XMN -ne 0 ] && [ $USING_CMS -eq 0 ]; then
JVM_OPTS="$JVM_OPTS -Xmn${HEAP_NEWSIZE}"
fi
# We fail to start if -Xmn is used with G1 GC is being used
# See comments for -Xmn in jvm-server.options
if [ $DEFINED_XMN -eq 0 ] && [ $USING_G1 -eq 0 ]; then
echo "It is not recommended to set -Xmn with the G1 garbage collector. See comments for -Xmn in jvm-server.options for details."
exit 1
fi
if [ "$JVM_ARCH" = "64-Bit" ] && [ $USING_CMS -eq 0 ]; then
JVM_OPTS="$JVM_OPTS -XX:+UseCondCardMark"
fi
# provides hints to the JIT compiler
JVM_OPTS="$JVM_OPTS -XX:CompileCommandFile=$CASSANDRA_CONF/hotspot_compiler"
# add the jamm javaagent
JVM_OPTS="$JVM_OPTS -javaagent:$CASSANDRA_HOME/lib/jamm-0.3.2.jar"
# set jvm HeapDumpPath with CASSANDRA_HEAPDUMP_DIR
if [ "x$CASSANDRA_HEAPDUMP_DIR" != "x" ]; then
JVM_OPTS="$JVM_OPTS -XX:HeapDumpPath=$CASSANDRA_HEAPDUMP_DIR/cassandra-`date +%s`-pid$$.hprof"
fi
# stop the jvm on OutOfMemoryError as it can result in some data corruption
# uncomment the preferred option
# ExitOnOutOfMemoryError and CrashOnOutOfMemoryError require a JRE greater or equals to 1.7 update 101 or 1.8 update 92
# For OnOutOfMemoryError we cannot use the JVM_OPTS variables because bash commands split words
# on white spaces without taking quotes into account
# JVM_OPTS="$JVM_OPTS -XX:+ExitOnOutOfMemoryError"
# JVM_OPTS="$JVM_OPTS -XX:+CrashOnOutOfMemoryError"
JVM_ON_OUT_OF_MEMORY_ERROR_OPT="-XX:OnOutOfMemoryError=kill -9 %p"
# print an heap histogram on OutOfMemoryError
# JVM_OPTS="$JVM_OPTS -Dcassandra.printHeapHistogramOnOutOfMemoryError=true"
# jmx: metrics and administration interface
#
# add this if you're having trouble connecting:
# JVM_OPTS="$JVM_OPTS -Djava.rmi.server.hostname=<public name>"
#
# see
# https://blogs.oracle.com/jmxetc/entry/troubleshooting_connection_problems_in_jconsole
# for more on configuring JMX through firewalls, etc. (Short version:
# get it working with no firewall first.)
#
# Cassandra ships with JMX accessible *only* from localhost.
# To enable remote JMX connections, uncomment lines below
# with authentication and/or ssl enabled. See https://wiki.apache.org/cassandra/JmxSecurity
#
if [ "x$LOCAL_JMX" = "x" ]; then
LOCAL_JMX=yes
fi
# Specifies the default port over which Cassandra will be available for
# JMX connections.
# For security reasons, you should not expose this port to the internet. Firewall it if needed.
JMX_PORT="7199"
if [ "$LOCAL_JMX" = "yes" ]; then
JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.local.port=$JMX_PORT"
JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=false"
else
JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.port=$JMX_PORT"
# if ssl is enabled the same port cannot be used for both jmx and rmi so either
# pick another value for this property or comment out to use a random port (though see CASSANDRA-7087 for origins)
JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.rmi.port=$JMX_PORT"
# turn on JMX authentication. See below for further options
JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=true"
# jmx ssl options
#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl=true"
#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.need.client.auth=true"
#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.enabled.protocols=<enabled-protocols>"
#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.enabled.cipher.suites=<enabled-cipher-suites>"
#JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.keyStore=/path/to/keystore"
#JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.keyStorePassword=<keystore-password>"
#JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.trustStore=/path/to/truststore"
#JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.trustStorePassword=<truststore-password>"
fi
# jmx authentication and authorization options. By default, auth is only
# activated for remote connections but they can also be enabled for local only JMX
## Basic file based authn & authz
JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.password.file=/etc/cassandra/jmxremote.password"
#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.access.file=/etc/cassandra/jmxremote.access"
## Custom auth settings which can be used as alternatives to JMX's out of the box auth utilities.
## JAAS login modules can be used for authentication by uncommenting these two properties.
## Cassandra ships with a LoginModule implementation - org.apache.cassandra.auth.CassandraLoginModule -
## which delegates to the IAuthenticator configured in cassandra.yaml. See the sample JAAS configuration
## file cassandra-jaas.config
#JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.login.config=CassandraLogin"
#JVM_OPTS="$JVM_OPTS -Djava.security.auth.login.config=$CASSANDRA_CONF/cassandra-jaas.config"
## Cassandra also ships with a helper for delegating JMX authz calls to the configured IAuthorizer,
## uncomment this to use it. Requires one of the two authentication options to be enabled
#JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.authorizer=org.apache.cassandra.auth.jmx.AuthorizationProxy"
# To use mx4j, an HTML interface for JMX, add mx4j-tools.jar to the lib/
# directory.
# See http://cassandra.apache.org/doc/latest/operating/metrics.html#jmx
# By default mx4j listens on the broadcast_address, port 8081. Uncomment the following lines
# to control its listen address and port.
#MX4J_ADDRESS="127.0.0.1"
#MX4J_PORT="8081"
# Cassandra uses SIGAR to capture OS metrics CASSANDRA-7838
# for SIGAR we have to set the java.library.path
# to the location of the native libraries.
JVM_OPTS="$JVM_OPTS -Djava.library.path=$CASSANDRA_HOME/lib/sigar-bin"
if [ "x$MX4J_ADDRESS" != "x" ]; then
if [[ "$MX4J_ADDRESS" == \-Dmx4jaddress* ]]; then
# Backward compatible with the older style #13578
JVM_OPTS="$JVM_OPTS $MX4J_ADDRESS"
else
JVM_OPTS="$JVM_OPTS -Dmx4jaddress=$MX4J_ADDRESS"
fi
fi
if [ "x$MX4J_PORT" != "x" ]; then
if [[ "$MX4J_PORT" == \-Dmx4jport* ]]; then
# Backward compatible with the older style #13578
JVM_OPTS="$JVM_OPTS $MX4J_PORT"
else
JVM_OPTS="$JVM_OPTS -Dmx4jport=$MX4J_PORT"
fi
fi
JVM_OPTS="$JVM_OPTS $JVM_EXTRA_OPTS"

View File

@ -0,0 +1,39 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# These properties are used with GossipingPropertyFileSnitch and will
# indicate the rack and dc for this node
dc= Mars
rack= West
# Add a suffix to a datacenter name. Used by the Ec2Snitch and Ec2MultiRegionSnitch
# to append a string to the EC2 region name.
#dc_suffix=
# Uncomment the following line to make this snitch prefer the internal ip when possible, as the Ec2MultiRegionSnitch does.
# prefer_local=true
# Datacenter and rack naming convention used by the Ec2Snitch and Ec2MultiRegionSnitch.
# Options are:
# legacy : datacenter name is the part of the availability zone name preceding the last "-"
# when the zone ends in -1 and includes the number if not -1. Rack is the portion of
# the availability zone name following the last "-".
# Examples: us-west-1a => dc: us-west, rack: 1a; us-west-2b => dc: us-west-2, rack: 2b;
# YOU MUST USE THIS VALUE IF YOU ARE UPGRADING A PRE-4.0 CLUSTER
# standard : Default value. datacenter name is the standard AWS region name, including the number.
# rack name is the region plus the availability zone letter.
# Examples: us-west-1a => dc: us-west-1, rack: us-west-1a; us-west-2b => dc: us-west-2, rack: us-west-2b;
# ec2_naming_scheme=standard

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,48 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# commitlog archiving configuration. Leave blank to disable.
# Command to execute to archive a commitlog segment
# Parameters: %path => Fully qualified path of the segment to archive
# %name => Name of the commit log.
# Example: archive_command=/bin/ln %path /backup/%name
#
# Limitation: *_command= expects one command with arguments. STDOUT
# and STDIN or multiple commands cannot be executed. You might want
# to script multiple commands and add a pointer here.
archive_command=
# Command to execute to make an archived commitlog live again.
# Parameters: %from is the full path to an archived commitlog segment (from restore_directories)
# %to is the live commitlog directory
# Example: restore_command=/bin/cp -f %from %to
restore_command=
# Directory to scan the recovery files in.
restore_directories=
# Restore mutations created up to and including this timestamp in GMT.
# Format: yyyy:MM:dd HH:mm:ss (2012:04:31 20:43:12)
#
# Recovery will continue through the segment when the first client-supplied
# timestamp greater than this time is encountered, but only mutations less than
# or equal to this timestamp will be applied.
restore_point_in_time=
# precision of the timestamp used in the inserts (MILLISECONDS, MICROSECONDS, ...)
precision=MICROSECONDS

View File

@ -0,0 +1,10 @@
###########################################################################
# jvm-clients.options #
# #
# See jvm8-clients.options and jvm11-clients.options for Java version #
# specific options. #
###########################################################################
# intentionally left empty
# The newline in the end of file is intentional

View File

@ -0,0 +1,188 @@
###########################################################################
# jvm-server.options #
# #
# - all flags defined here will be used by cassandra to startup the JVM #
# - one flag should be specified per line #
# - lines that do not start with '-' will be ignored #
# - only static flags are accepted (no variables or parameters) #
# - dynamic flags will be appended to these on cassandra-env #
# #
# See jvm8-server.options and jvm11-server.options for Java version #
# specific options. #
###########################################################################
######################
# STARTUP PARAMETERS #
######################
# Uncomment any of the following properties to enable specific startup parameters
# In a multi-instance deployment, multiple Cassandra instances will independently assume that all
# CPU processors are available to it. This setting allows you to specify a smaller set of processors
# and perhaps have affinity.
#-Dcassandra.available_processors=number_of_processors
# The directory location of the cassandra.yaml file.
#-Dcassandra.config=directory
# Sets the initial partitioner token for a node the first time the node is started.
#-Dcassandra.initial_token=token
# Set to false to start Cassandra on a node but not have the node join the cluster.
#-Dcassandra.join_ring=true|false
# Set to false to clear all gossip state for the node on restart. Use when you have changed node
# information in cassandra.yaml (such as listen_address).
#-Dcassandra.load_ring_state=true|false
# Enable pluggable metrics reporter. See Pluggable metrics reporting in Cassandra 2.0.2.
#-Dcassandra.metricsReporterConfigFile=file
# Set the port on which the CQL native transport listens for clients. (Default: 9042)
#-Dcassandra.native_transport_port=port
# Overrides the partitioner. (Default: org.apache.cassandra.dht.Murmur3Partitioner)
#-Dcassandra.partitioner=partitioner
# To replace a node that has died, restart a new node in its place specifying the address of the
# dead node. The new node must not have any data in its data directory, that is, it must be in the
# same state as before bootstrapping.
#-Dcassandra.replace_address=listen_address or broadcast_address of dead node
# Allow restoring specific tables from an archived commit log.
#-Dcassandra.replayList=table
# Allows overriding of the default RING_DELAY (30000ms), which is the amount of time a node waits
# before joining the ring.
#-Dcassandra.ring_delay_ms=ms
# Set the SSL port for encrypted communication. (Default: 7001)
#-Dcassandra.ssl_storage_port=port
# Set the port for inter-node communication. (Default: 7000)
#-Dcassandra.storage_port=port
# Set the default location for the trigger JARs. (Default: conf/triggers)
#-Dcassandra.triggers_dir=directory
# For testing new compaction and compression strategies. It allows you to experiment with different
# strategies and benchmark write performance differences without affecting the production workload.
#-Dcassandra.write_survey=true
# To disable configuration via JMX of auth caches (such as those for credentials, permissions and
# roles). This will mean those config options can only be set (persistently) in cassandra.yaml
# and will require a restart for new values to take effect.
#-Dcassandra.disable_auth_caches_remote_configuration=true
# To disable dynamic calculation of the page size used when indexing an entire partition (during
# initial index build/rebuild). If set to true, the page size will be fixed to the default of
# 10000 rows per page.
#-Dcassandra.force_default_indexing_page_size=true
# Imposes an upper bound on hint lifetime below the normal min gc_grace_seconds
#-Dcassandra.maxHintTTL=max_hint_ttl_in_seconds
########################
# GENERAL JVM SETTINGS #
########################
# enable assertions. highly suggested for correct application functionality.
-ea
# disable assertions for net.openhft.** because it runs out of memory by design
# if enabled and run for more than just brief testing
-da:net.openhft...
# enable thread priorities, primarily so we can give periodic tasks
# a lower priority to avoid interfering with client workload
-XX:+UseThreadPriorities
# Enable heap-dump if there's an OOM
-XX:+HeapDumpOnOutOfMemoryError
# Per-thread stack size.
-Xss256k
# Make sure all memory is faulted and zeroed on startup.
# This helps prevent soft faults in containers and makes
# transparent hugepage allocation more effective.
-XX:+AlwaysPreTouch
# Disable biased locking as it does not benefit Cassandra.
-XX:-UseBiasedLocking
# Enable thread-local allocation blocks and allow the JVM to automatically
# resize them at runtime.
-XX:+UseTLAB
-XX:+ResizeTLAB
-XX:+UseNUMA
# http://www.evanjones.ca/jvm-mmap-pause.html
-XX:+PerfDisableSharedMem
# Prefer binding to IPv4 network intefaces (when net.ipv6.bindv6only=1). See
# http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6342561 (short version:
# comment out this entry to enable IPv6 support).
-Djava.net.preferIPv4Stack=true
### Debug options
# uncomment to enable flight recorder
#-XX:+UnlockCommercialFeatures
#-XX:+FlightRecorder
# uncomment to have Cassandra JVM listen for remote debuggers/profilers on port 1414
#-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=1414
# uncomment to have Cassandra JVM log internal method compilation (developers only)
#-XX:+UnlockDiagnosticVMOptions
#-XX:+LogCompilation
#################
# HEAP SETTINGS #
#################
# Heap size is automatically calculated by cassandra-env based on this
# formula: max(min(1/2 ram, 1024MB), min(1/4 ram, 8GB))
# That is:
# - calculate 1/2 ram and cap to 1024MB
# - calculate 1/4 ram and cap to 8192MB
# - pick the max
#
# For production use you may wish to adjust this for your environment.
# If that's the case, uncomment the -Xmx and Xms options below to override the
# automatic calculation of JVM heap memory.
#
# It is recommended to set min (-Xms) and max (-Xmx) heap sizes to
# the same value to avoid stop-the-world GC pauses during resize, and
# so that we can lock the heap in memory on startup to prevent any
# of it from being swapped out.
#-Xms4G
#-Xmx4G
# Young generation size is automatically calculated by cassandra-env
# based on this formula: min(100 * num_cores, 1/4 * heap size)
#
# The main trade-off for the young generation is that the larger it
# is, the longer GC pause times will be. The shorter it is, the more
# expensive GC will be (usually).
#
# It is not recommended to set the young generation size if using the
# G1 GC, since that will override the target pause-time goal.
# More info: http://www.oracle.com/technetwork/articles/java/g1gc-1984535.html
#
# The example below assumes a modern 8-core+ machine for decent
# times. If in doubt, and if you do not particularly want to tweak, go
# 100 MB per physical CPU core.
#-Xmn800M
###################################
# EXPIRATION DATE OVERFLOW POLICY #
###################################
# Defines how to handle INSERT requests with TTL exceeding the maximum supported expiration date:
# * REJECT: this is the default policy and will reject any requests with expiration date timestamp after 2038-01-19T03:14:06+00:00.
# * CAP: any insert with TTL expiring after 2038-01-19T03:14:06+00:00 will expire on 2038-01-19T03:14:06+00:00 and the client will receive a warning.
# * CAP_NOWARN: same as previous, except that the client warning will not be emitted.
#
#-Dcassandra.expiration_date_overflow_policy=REJECT

View File

@ -0,0 +1,29 @@
###########################################################################
# jvm11-clients.options #
# #
# See jvm-clients.options. This file is specific for Java 11 and newer. #
###########################################################################
###################
# JPMS SETTINGS #
###################
-Djdk.attach.allowAttachSelf=true
--add-exports java.base/jdk.internal.misc=ALL-UNNAMED
--add-exports java.base/jdk.internal.ref=ALL-UNNAMED
--add-exports java.base/sun.nio.ch=ALL-UNNAMED
--add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED
--add-exports java.sql/java.sql=ALL-UNNAMED
--add-opens java.base/java.lang.module=ALL-UNNAMED
--add-opens java.base/jdk.internal.loader=ALL-UNNAMED
--add-opens java.base/jdk.internal.ref=ALL-UNNAMED
--add-opens java.base/jdk.internal.reflect=ALL-UNNAMED
--add-opens java.base/jdk.internal.math=ALL-UNNAMED
--add-opens java.base/jdk.internal.module=ALL-UNNAMED
--add-opens java.base/jdk.internal.util.jar=ALL-UNNAMED
--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED
# The newline in the end of file is intentional

View File

@ -0,0 +1,103 @@
###########################################################################
# jvm11-server.options #
# #
# See jvm-server.options. This file is specific for Java 11 and newer. #
###########################################################################
#################
# GC SETTINGS #
#################
### CMS Settings
-XX:+UseConcMarkSweepGC
-XX:+CMSParallelRemarkEnabled
-XX:SurvivorRatio=8
-XX:MaxTenuringThreshold=1
-XX:CMSInitiatingOccupancyFraction=75
-XX:+UseCMSInitiatingOccupancyOnly
-XX:CMSWaitDuration=10000
-XX:+CMSParallelInitialMarkEnabled
-XX:+CMSEdenChunksRecordAlways
## some JVMs will fill up their heap when accessed via JMX, see CASSANDRA-6541
-XX:+CMSClassUnloadingEnabled
### G1 Settings
## Use the Hotspot garbage-first collector.
#-XX:+UseG1GC
#-XX:+ParallelRefProcEnabled
#
## Have the JVM do less remembered set work during STW, instead
## preferring concurrent GC. Reduces p99.9 latency.
#-XX:G1RSetUpdatingPauseTimePercent=5
#
## Main G1GC tunable: lowering the pause target will lower throughput and vise versa.
## 200ms is the JVM default and lowest viable setting
## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml.
#-XX:MaxGCPauseMillis=500
## Optional G1 Settings
# Save CPU time on large (>= 16GB) heaps by delaying region scanning
# until the heap is 70% full. The default in Hotspot 8u40 is 40%.
#-XX:InitiatingHeapOccupancyPercent=70
# For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores.
# Otherwise equal to the number of cores when 8 or less.
# Machines with > 10 cores should try setting these to <= full cores.
#-XX:ParallelGCThreads=16
# By default, ConcGCThreads is 1/4 of ParallelGCThreads.
# Setting both to the same value can reduce STW durations.
#-XX:ConcGCThreads=16
### JPMS
-Djdk.attach.allowAttachSelf=true
--add-exports java.base/jdk.internal.misc=ALL-UNNAMED
--add-exports java.base/jdk.internal.ref=ALL-UNNAMED
--add-exports java.base/sun.nio.ch=ALL-UNNAMED
--add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED
--add-exports java.sql/java.sql=ALL-UNNAMED
--add-opens java.base/java.lang.module=ALL-UNNAMED
--add-opens java.base/jdk.internal.loader=ALL-UNNAMED
--add-opens java.base/jdk.internal.ref=ALL-UNNAMED
--add-opens java.base/jdk.internal.reflect=ALL-UNNAMED
--add-opens java.base/jdk.internal.math=ALL-UNNAMED
--add-opens java.base/jdk.internal.module=ALL-UNNAMED
--add-opens java.base/jdk.internal.util.jar=ALL-UNNAMED
--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED
### GC logging options -- uncomment to enable
# Java 11 (and newer) GC logging options:
# See description of https://bugs.openjdk.java.net/browse/JDK-8046148 for details about the syntax
# The following is the equivalent to -XX:+PrintGCDetails -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M
#-Xlog:gc=info,heap*=trace,age*=debug,safepoint=info,promotion*=trace:file=/var/log/cassandra/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760
# Notes for Java 8 migration:
#
# -XX:+PrintGCDetails maps to -Xlog:gc*:... - i.e. add a '*' after "gc"
# -XX:+PrintGCDateStamps maps to decorator 'time'
#
# -XX:+PrintHeapAtGC maps to 'heap' with level 'trace'
# -XX:+PrintTenuringDistribution maps to 'age' with level 'debug'
# -XX:+PrintGCApplicationStoppedTime maps to 'safepoint' with level 'info'
# -XX:+PrintPromotionFailure maps to 'promotion' with level 'trace'
# -XX:PrintFLSStatistics=1 maps to 'freelist' with level 'trace'
### Netty Options
# On Java >= 9 Netty requires the io.netty.tryReflectionSetAccessible system property to be set to true to enable
# creation of direct buffers using Unsafe. Without it, this falls back to ByteBuffer.allocateDirect which has
# inferior performance and risks exceeding MaxDirectMemory
-Dio.netty.tryReflectionSetAccessible=true
# The newline in the end of file is intentional

View File

@ -0,0 +1,9 @@
###########################################################################
# jvm8-clients.options #
# #
# See jvm-clients.options. This file is specific for Java 8 and newer. #
###########################################################################
# intentionally left empty
# The newline in the end of file is intentional

View File

@ -0,0 +1,76 @@
###########################################################################
# jvm8-server.options #
# #
# See jvm-server.options. This file is specific for Java 8 and newer. #
###########################################################################
########################
# GENERAL JVM SETTINGS #
########################
# allows lowering thread priority without being root on linux - probably
# not necessary on Windows but doesn't harm anything.
# see http://tech.stolsvik.com/2010/01/linux-java-thread-priorities-workaround.html
-XX:ThreadPriorityPolicy=42
#################
# GC SETTINGS #
#################
### CMS Settings
-XX:+UseParNewGC
-XX:+UseConcMarkSweepGC
-XX:+CMSParallelRemarkEnabled
-XX:SurvivorRatio=8
-XX:MaxTenuringThreshold=1
-XX:CMSInitiatingOccupancyFraction=75
-XX:+UseCMSInitiatingOccupancyOnly
-XX:CMSWaitDuration=10000
-XX:+CMSParallelInitialMarkEnabled
-XX:+CMSEdenChunksRecordAlways
## some JVMs will fill up their heap when accessed via JMX, see CASSANDRA-6541
-XX:+CMSClassUnloadingEnabled
### G1 Settings
## Use the Hotspot garbage-first collector.
#-XX:+UseG1GC
#-XX:+ParallelRefProcEnabled
#
## Have the JVM do less remembered set work during STW, instead
## preferring concurrent GC. Reduces p99.9 latency.
#-XX:G1RSetUpdatingPauseTimePercent=5
#
## Main G1GC tunable: lowering the pause target will lower throughput and vise versa.
## 200ms is the JVM default and lowest viable setting
## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml.
#-XX:MaxGCPauseMillis=500
## Optional G1 Settings
# Save CPU time on large (>= 16GB) heaps by delaying region scanning
# until the heap is 70% full. The default in Hotspot 8u40 is 40%.
#-XX:InitiatingHeapOccupancyPercent=70
# For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores.
# Otherwise equal to the number of cores when 8 or less.
# Machines with > 10 cores should try setting these to <= full cores.
#-XX:ParallelGCThreads=16
# By default, ConcGCThreads is 1/4 of ParallelGCThreads.
# Setting both to the same value can reduce STW durations.
#-XX:ConcGCThreads=16
### GC logging options -- uncomment to enable
-XX:+PrintGCDetails
-XX:+PrintGCDateStamps
-XX:+PrintHeapAtGC
-XX:+PrintTenuringDistribution
-XX:+PrintGCApplicationStoppedTime
-XX:+PrintPromotionFailure
#-XX:PrintFLSStatistics=1
#-Xloggc:/var/log/cassandra/gc.log
-XX:+UseGCLogFileRotation
-XX:NumberOfGCLogFiles=10
-XX:GCLogFileSize=10M
# The newline in the end of file is intentional

View File

@ -0,0 +1,124 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<!--
In order to disable debug.log, comment-out the ASYNCDEBUGLOG
appender reference in the root level section below.
-->
<configuration scan="true" scanPeriod="60 seconds">
<jmxConfigurator />
<!-- No shutdown hook; we run it ourselves in StorageService after shutdown -->
<!-- SYSTEMLOG rolling file appender to system.log (INFO level) -->
<appender name="SYSTEMLOG" class="ch.qos.logback.core.rolling.RollingFileAppender">
<filter class="ch.qos.logback.classic.filter.ThresholdFilter">
<level>INFO</level>
</filter>
<file>${cassandra.logdir}/system.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<!-- rollover daily -->
<fileNamePattern>${cassandra.logdir}/system.log.%d{yyyy-MM-dd}.%i.zip</fileNamePattern>
<!-- each file should be at most 50MB, keep 7 days worth of history, but at most 5GB -->
<maxFileSize>50MB</maxFileSize>
<maxHistory>7</maxHistory>
<totalSizeCap>5GB</totalSizeCap>
</rollingPolicy>
<encoder>
<pattern>%-5level [%thread] %date{ISO8601} %F:%L - %msg%n</pattern>
</encoder>
</appender>
<!-- DEBUGLOG rolling file appender to debug.log (all levels) -->
<appender name="DEBUGLOG" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${cassandra.logdir}/debug.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<!-- rollover daily -->
<fileNamePattern>${cassandra.logdir}/debug.log.%d{yyyy-MM-dd}.%i.zip</fileNamePattern>
<!-- each file should be at most 50MB, keep 7 days worth of history, but at most 5GB -->
<maxFileSize>50MB</maxFileSize>
<maxHistory>7</maxHistory>
<totalSizeCap>5GB</totalSizeCap>
</rollingPolicy>
<encoder>
<pattern>%-5level [%thread] %date{ISO8601} %F:%L - %msg%n</pattern>
</encoder>
</appender>
<!-- ASYNCLOG assynchronous appender to debug.log (all levels) -->
<appender name="ASYNCDEBUGLOG" class="ch.qos.logback.classic.AsyncAppender">
<queueSize>1024</queueSize>
<discardingThreshold>0</discardingThreshold>
<includeCallerData>true</includeCallerData>
<appender-ref ref="DEBUGLOG" />
</appender>
<!-- STDOUT console appender to stdout (INFO level) -->
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<filter class="ch.qos.logback.classic.filter.ThresholdFilter">
<level>INFO</level>
</filter>
<encoder>
<pattern>%-5level [%thread] %date{ISO8601} %F:%L - %msg%n</pattern>
</encoder>
</appender>
<!-- Uncomment below configuration (Audit Logging (FileAuditLogger) rolling file appender and Audit Logging
additivity) in order to have the log events flow through separate log file instead of system.log.
Audit Logging (FileAuditLogger) rolling file appender to audit.log -->
<!-- <appender name="AUDIT" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${cassandra.logdir}/audit/audit.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy"> -->
<!-- rollover daily -->
<!-- <fileNamePattern>${cassandra.logdir}/audit/audit.log.%d{yyyy-MM-dd}.%i.zip</fileNamePattern> -->
<!-- each file should be at most 50MB, keep 30 days worth of history, but at most 5GB -->
<!-- <maxFileSize>50MB</maxFileSize>
<maxHistory>30</maxHistory>
<totalSizeCap>5GB</totalSizeCap>
</rollingPolicy>
<encoder>
<pattern>%-5level [%thread] %date{ISO8601} %F:%L - %msg%n</pattern>
</encoder>
</appender> -->
<!-- Audit Logging additivity to redirect audit logging events to audit/audit.log -->
<!-- <logger name="org.apache.cassandra.audit" additivity="false" level="INFO">
<appender-ref ref="AUDIT"/>
</logger> -->
<!-- Uncomment bellow and corresponding appender-ref to activate logback metrics
<appender name="LogbackMetrics" class="com.codahale.metrics.logback.InstrumentedAppender" />
-->
<root level="INFO">
<appender-ref ref="SYSTEMLOG" />
<appender-ref ref="STDOUT" />
<appender-ref ref="ASYNCDEBUGLOG" /> <!-- Comment this line to disable debug.log -->
<!--
<appender-ref ref="LogbackMetrics" />
-->
</root>
<logger name="org.apache.cassandra" level="DEBUG"/>
</configuration>

11
etc/cass2/.gitignore vendored 100644
View File

@ -0,0 +1,11 @@
# ignore everything
#*
# except
# .gitignore
# !cassandra.yaml
# !jvm*
# !logback.xml
# !commitlog_archiving.properties
# !cassandra-rackdc.properties
# !cassandra-env.sh

View File

@ -0,0 +1,307 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
calculate_heap_sizes()
{
case "`uname`" in
Linux)
system_memory_in_mb=`free -m | awk '/:/ {print $2;exit}'`
system_cpu_cores=`egrep -c 'processor([[:space:]]+):.*' /proc/cpuinfo`
;;
FreeBSD)
system_memory_in_bytes=`sysctl hw.physmem | awk '{print $2}'`
system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024`
system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'`
;;
SunOS)
system_memory_in_mb=`prtconf | awk '/Memory size:/ {print $3}'`
system_cpu_cores=`psrinfo | wc -l`
;;
Darwin)
system_memory_in_bytes=`sysctl hw.memsize | awk '{print $2}'`
system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024`
system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'`
;;
*)
# assume reasonable defaults for e.g. a modern desktop or
# cheap server
system_memory_in_mb="2048"
system_cpu_cores="2"
;;
esac
# some systems like the raspberry pi don't report cores, use at least 1
if [ "$system_cpu_cores" -lt "1" ]
then
system_cpu_cores="1"
fi
# set max heap size based on the following
# max(min(1/2 ram, 1024MB), min(1/4 ram, 8GB))
# calculate 1/2 ram and cap to 1024MB
# calculate 1/4 ram and cap to 8192MB
# pick the max
half_system_memory_in_mb=`expr $system_memory_in_mb / 2`
quarter_system_memory_in_mb=`expr $half_system_memory_in_mb / 2`
if [ "$half_system_memory_in_mb" -gt "1024" ]
then
half_system_memory_in_mb="1024"
fi
if [ "$quarter_system_memory_in_mb" -gt "8192" ]
then
quarter_system_memory_in_mb="8192"
fi
if [ "$half_system_memory_in_mb" -gt "$quarter_system_memory_in_mb" ]
then
max_heap_size_in_mb="$half_system_memory_in_mb"
else
max_heap_size_in_mb="$quarter_system_memory_in_mb"
fi
MAX_HEAP_SIZE="${max_heap_size_in_mb}M"
# Young gen: min(max_sensible_per_modern_cpu_core * num_cores, 1/4 * heap size)
max_sensible_yg_per_core_in_mb="100"
max_sensible_yg_in_mb=`expr $max_sensible_yg_per_core_in_mb "*" $system_cpu_cores`
desired_yg_in_mb=`expr $max_heap_size_in_mb / 4`
if [ "$desired_yg_in_mb" -gt "$max_sensible_yg_in_mb" ]
then
HEAP_NEWSIZE="${max_sensible_yg_in_mb}M"
else
HEAP_NEWSIZE="${desired_yg_in_mb}M"
fi
}
# Sets the path where logback and GC logs are written.
if [ "x$CASSANDRA_LOG_DIR" = "x" ] ; then
CASSANDRA_LOG_DIR="$CASSANDRA_HOME/logs"
fi
#GC log path has to be defined here because it needs to access CASSANDRA_HOME
if [ $JAVA_VERSION -ge 11 ] ; then
# See description of https://bugs.openjdk.java.net/browse/JDK-8046148 for details about the syntax
# The following is the equivalent to -XX:+PrintGCDetails -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M
echo "$JVM_OPTS" | grep -qe "-[X]log:gc"
if [ "$?" = "1" ] ; then # [X] to prevent ccm from replacing this line
# only add -Xlog:gc if it's not mentioned in jvm-server.options file
mkdir -p ${CASSANDRA_LOG_DIR}
JVM_OPTS="$JVM_OPTS -Xlog:gc=info,heap*=trace,age*=debug,safepoint=info,promotion*=trace:file=${CASSANDRA_LOG_DIR}/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760"
fi
else
# Java 8
echo "$JVM_OPTS" | grep -qe "-[X]loggc"
if [ "$?" = "1" ] ; then # [X] to prevent ccm from replacing this line
# only add -Xlog:gc if it's not mentioned in jvm-server.options file
mkdir -p ${CASSANDRA_LOG_DIR}
JVM_OPTS="$JVM_OPTS -Xloggc:${CASSANDRA_LOG_DIR}/gc.log"
fi
fi
# Check what parameters were defined on jvm-server.options file to avoid conflicts
echo $JVM_OPTS | grep -q Xmn
DEFINED_XMN=$?
echo $JVM_OPTS | grep -q Xmx
DEFINED_XMX=$?
echo $JVM_OPTS | grep -q Xms
DEFINED_XMS=$?
echo $JVM_OPTS | grep -q UseConcMarkSweepGC
USING_CMS=$?
echo $JVM_OPTS | grep -q +UseG1GC
USING_G1=$?
# Override these to set the amount of memory to allocate to the JVM at
# start-up. For production use you may wish to adjust this for your
# environment. MAX_HEAP_SIZE is the total amount of memory dedicated
# to the Java heap. HEAP_NEWSIZE refers to the size of the young
# generation. Both MAX_HEAP_SIZE and HEAP_NEWSIZE should be either set
# or not (if you set one, set the other).
#
# The main trade-off for the young generation is that the larger it
# is, the longer GC pause times will be. The shorter it is, the more
# expensive GC will be (usually).
#
# The example HEAP_NEWSIZE assumes a modern 8-core+ machine for decent pause
# times. If in doubt, and if you do not particularly want to tweak, go with
# 100 MB per physical CPU core.
#MAX_HEAP_SIZE="4G"
#HEAP_NEWSIZE="800M"
# Set this to control the amount of arenas per-thread in glibc
#export MALLOC_ARENA_MAX=4
# only calculate the size if it's not set manually
if [ "x$MAX_HEAP_SIZE" = "x" ] && [ "x$HEAP_NEWSIZE" = "x" -o $USING_G1 -eq 0 ]; then
calculate_heap_sizes
elif [ "x$MAX_HEAP_SIZE" = "x" ] || [ "x$HEAP_NEWSIZE" = "x" -a $USING_G1 -ne 0 ]; then
echo "please set or unset MAX_HEAP_SIZE and HEAP_NEWSIZE in pairs when using CMS GC (see cassandra-env.sh)"
exit 1
fi
if [ "x$MALLOC_ARENA_MAX" = "x" ] ; then
export MALLOC_ARENA_MAX=4
fi
# We only set -Xms and -Xmx if they were not defined on jvm-server.options file
# If defined, both Xmx and Xms should be defined together.
if [ $DEFINED_XMX -ne 0 ] && [ $DEFINED_XMS -ne 0 ]; then
JVM_OPTS="$JVM_OPTS -Xms${MAX_HEAP_SIZE}"
JVM_OPTS="$JVM_OPTS -Xmx${MAX_HEAP_SIZE}"
elif [ $DEFINED_XMX -ne 0 ] || [ $DEFINED_XMS -ne 0 ]; then
echo "Please set or unset -Xmx and -Xms flags in pairs on jvm-server.options file."
exit 1
fi
# We only set -Xmn flag if it was not defined in jvm-server.options file
# and if the CMS GC is being used
# If defined, both Xmn and Xmx should be defined together.
if [ $DEFINED_XMN -eq 0 ] && [ $DEFINED_XMX -ne 0 ]; then
echo "Please set or unset -Xmx and -Xmn flags in pairs on jvm-server.options file."
exit 1
elif [ $DEFINED_XMN -ne 0 ] && [ $USING_CMS -eq 0 ]; then
JVM_OPTS="$JVM_OPTS -Xmn${HEAP_NEWSIZE}"
fi
# We fail to start if -Xmn is used with G1 GC is being used
# See comments for -Xmn in jvm-server.options
if [ $DEFINED_XMN -eq 0 ] && [ $USING_G1 -eq 0 ]; then
echo "It is not recommended to set -Xmn with the G1 garbage collector. See comments for -Xmn in jvm-server.options for details."
exit 1
fi
if [ "$JVM_ARCH" = "64-Bit" ] && [ $USING_CMS -eq 0 ]; then
JVM_OPTS="$JVM_OPTS -XX:+UseCondCardMark"
fi
# provides hints to the JIT compiler
JVM_OPTS="$JVM_OPTS -XX:CompileCommandFile=$CASSANDRA_CONF/hotspot_compiler"
# add the jamm javaagent
JVM_OPTS="$JVM_OPTS -javaagent:$CASSANDRA_HOME/lib/jamm-0.3.2.jar"
# set jvm HeapDumpPath with CASSANDRA_HEAPDUMP_DIR
if [ "x$CASSANDRA_HEAPDUMP_DIR" != "x" ]; then
JVM_OPTS="$JVM_OPTS -XX:HeapDumpPath=$CASSANDRA_HEAPDUMP_DIR/cassandra-`date +%s`-pid$$.hprof"
fi
# stop the jvm on OutOfMemoryError as it can result in some data corruption
# uncomment the preferred option
# ExitOnOutOfMemoryError and CrashOnOutOfMemoryError require a JRE greater or equals to 1.7 update 101 or 1.8 update 92
# For OnOutOfMemoryError we cannot use the JVM_OPTS variables because bash commands split words
# on white spaces without taking quotes into account
# JVM_OPTS="$JVM_OPTS -XX:+ExitOnOutOfMemoryError"
# JVM_OPTS="$JVM_OPTS -XX:+CrashOnOutOfMemoryError"
JVM_ON_OUT_OF_MEMORY_ERROR_OPT="-XX:OnOutOfMemoryError=kill -9 %p"
# print an heap histogram on OutOfMemoryError
# JVM_OPTS="$JVM_OPTS -Dcassandra.printHeapHistogramOnOutOfMemoryError=true"
# jmx: metrics and administration interface
#
# add this if you're having trouble connecting:
# JVM_OPTS="$JVM_OPTS -Djava.rmi.server.hostname=<public name>"
#
# see
# https://blogs.oracle.com/jmxetc/entry/troubleshooting_connection_problems_in_jconsole
# for more on configuring JMX through firewalls, etc. (Short version:
# get it working with no firewall first.)
#
# Cassandra ships with JMX accessible *only* from localhost.
# To enable remote JMX connections, uncomment lines below
# with authentication and/or ssl enabled. See https://wiki.apache.org/cassandra/JmxSecurity
#
if [ "x$LOCAL_JMX" = "x" ]; then
LOCAL_JMX=yes
fi
# Specifies the default port over which Cassandra will be available for
# JMX connections.
# For security reasons, you should not expose this port to the internet. Firewall it if needed.
JMX_PORT="7199"
if [ "$LOCAL_JMX" = "yes" ]; then
JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.local.port=$JMX_PORT"
JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=false"
else
JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.port=$JMX_PORT"
# if ssl is enabled the same port cannot be used for both jmx and rmi so either
# pick another value for this property or comment out to use a random port (though see CASSANDRA-7087 for origins)
JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.rmi.port=$JMX_PORT"
# turn on JMX authentication. See below for further options
JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=true"
# jmx ssl options
#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl=true"
#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.need.client.auth=true"
#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.enabled.protocols=<enabled-protocols>"
#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.enabled.cipher.suites=<enabled-cipher-suites>"
#JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.keyStore=/path/to/keystore"
#JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.keyStorePassword=<keystore-password>"
#JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.trustStore=/path/to/truststore"
#JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.trustStorePassword=<truststore-password>"
fi
# jmx authentication and authorization options. By default, auth is only
# activated for remote connections but they can also be enabled for local only JMX
## Basic file based authn & authz
JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.password.file=/etc/cassandra/jmxremote.password"
#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.access.file=/etc/cassandra/jmxremote.access"
## Custom auth settings which can be used as alternatives to JMX's out of the box auth utilities.
## JAAS login modules can be used for authentication by uncommenting these two properties.
## Cassandra ships with a LoginModule implementation - org.apache.cassandra.auth.CassandraLoginModule -
## which delegates to the IAuthenticator configured in cassandra.yaml. See the sample JAAS configuration
## file cassandra-jaas.config
#JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.login.config=CassandraLogin"
#JVM_OPTS="$JVM_OPTS -Djava.security.auth.login.config=$CASSANDRA_CONF/cassandra-jaas.config"
## Cassandra also ships with a helper for delegating JMX authz calls to the configured IAuthorizer,
## uncomment this to use it. Requires one of the two authentication options to be enabled
#JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.authorizer=org.apache.cassandra.auth.jmx.AuthorizationProxy"
# To use mx4j, an HTML interface for JMX, add mx4j-tools.jar to the lib/
# directory.
# See http://cassandra.apache.org/doc/latest/operating/metrics.html#jmx
# By default mx4j listens on the broadcast_address, port 8081. Uncomment the following lines
# to control its listen address and port.
#MX4J_ADDRESS="127.0.0.1"
#MX4J_PORT="8081"
# Cassandra uses SIGAR to capture OS metrics CASSANDRA-7838
# for SIGAR we have to set the java.library.path
# to the location of the native libraries.
JVM_OPTS="$JVM_OPTS -Djava.library.path=$CASSANDRA_HOME/lib/sigar-bin"
if [ "x$MX4J_ADDRESS" != "x" ]; then
if [[ "$MX4J_ADDRESS" == \-Dmx4jaddress* ]]; then
# Backward compatible with the older style #13578
JVM_OPTS="$JVM_OPTS $MX4J_ADDRESS"
else
JVM_OPTS="$JVM_OPTS -Dmx4jaddress=$MX4J_ADDRESS"
fi
fi
if [ "x$MX4J_PORT" != "x" ]; then
if [[ "$MX4J_PORT" == \-Dmx4jport* ]]; then
# Backward compatible with the older style #13578
JVM_OPTS="$JVM_OPTS $MX4J_PORT"
else
JVM_OPTS="$JVM_OPTS -Dmx4jport=$MX4J_PORT"
fi
fi
JVM_OPTS="$JVM_OPTS $JVM_EXTRA_OPTS"

View File

@ -0,0 +1,39 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# These properties are used with GossipingPropertyFileSnitch and will
# indicate the rack and dc for this node
dc= Mars
rack= West
# Add a suffix to a datacenter name. Used by the Ec2Snitch and Ec2MultiRegionSnitch
# to append a string to the EC2 region name.
#dc_suffix=
# Uncomment the following line to make this snitch prefer the internal ip when possible, as the Ec2MultiRegionSnitch does.
# prefer_local=true
# Datacenter and rack naming convention used by the Ec2Snitch and Ec2MultiRegionSnitch.
# Options are:
# legacy : datacenter name is the part of the availability zone name preceding the last "-"
# when the zone ends in -1 and includes the number if not -1. Rack is the portion of
# the availability zone name following the last "-".
# Examples: us-west-1a => dc: us-west, rack: 1a; us-west-2b => dc: us-west-2, rack: 2b;
# YOU MUST USE THIS VALUE IF YOU ARE UPGRADING A PRE-4.0 CLUSTER
# standard : Default value. datacenter name is the standard AWS region name, including the number.
# rack name is the region plus the availability zone letter.
# Examples: us-west-1a => dc: us-west-1, rack: us-west-1a; us-west-2b => dc: us-west-2, rack: us-west-2b;
# ec2_naming_scheme=standard

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,48 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# commitlog archiving configuration. Leave blank to disable.
# Command to execute to archive a commitlog segment
# Parameters: %path => Fully qualified path of the segment to archive
# %name => Name of the commit log.
# Example: archive_command=/bin/ln %path /backup/%name
#
# Limitation: *_command= expects one command with arguments. STDOUT
# and STDIN or multiple commands cannot be executed. You might want
# to script multiple commands and add a pointer here.
archive_command=
# Command to execute to make an archived commitlog live again.
# Parameters: %from is the full path to an archived commitlog segment (from restore_directories)
# %to is the live commitlog directory
# Example: restore_command=/bin/cp -f %from %to
restore_command=
# Directory to scan the recovery files in.
restore_directories=
# Restore mutations created up to and including this timestamp in GMT.
# Format: yyyy:MM:dd HH:mm:ss (2012:04:31 20:43:12)
#
# Recovery will continue through the segment when the first client-supplied
# timestamp greater than this time is encountered, but only mutations less than
# or equal to this timestamp will be applied.
restore_point_in_time=
# precision of the timestamp used in the inserts (MILLISECONDS, MICROSECONDS, ...)
precision=MICROSECONDS

View File

@ -0,0 +1,10 @@
###########################################################################
# jvm-clients.options #
# #
# See jvm8-clients.options and jvm11-clients.options for Java version #
# specific options. #
###########################################################################
# intentionally left empty
# The newline in the end of file is intentional

View File

@ -0,0 +1,188 @@
###########################################################################
# jvm-server.options #
# #
# - all flags defined here will be used by cassandra to startup the JVM #
# - one flag should be specified per line #
# - lines that do not start with '-' will be ignored #
# - only static flags are accepted (no variables or parameters) #
# - dynamic flags will be appended to these on cassandra-env #
# #
# See jvm8-server.options and jvm11-server.options for Java version #
# specific options. #
###########################################################################
######################
# STARTUP PARAMETERS #
######################
# Uncomment any of the following properties to enable specific startup parameters
# In a multi-instance deployment, multiple Cassandra instances will independently assume that all
# CPU processors are available to it. This setting allows you to specify a smaller set of processors
# and perhaps have affinity.
#-Dcassandra.available_processors=number_of_processors
# The directory location of the cassandra.yaml file.
#-Dcassandra.config=directory
# Sets the initial partitioner token for a node the first time the node is started.
#-Dcassandra.initial_token=token
# Set to false to start Cassandra on a node but not have the node join the cluster.
#-Dcassandra.join_ring=true|false
# Set to false to clear all gossip state for the node on restart. Use when you have changed node
# information in cassandra.yaml (such as listen_address).
#-Dcassandra.load_ring_state=true|false
# Enable pluggable metrics reporter. See Pluggable metrics reporting in Cassandra 2.0.2.
#-Dcassandra.metricsReporterConfigFile=file
# Set the port on which the CQL native transport listens for clients. (Default: 9042)
#-Dcassandra.native_transport_port=port
# Overrides the partitioner. (Default: org.apache.cassandra.dht.Murmur3Partitioner)
#-Dcassandra.partitioner=partitioner
# To replace a node that has died, restart a new node in its place specifying the address of the
# dead node. The new node must not have any data in its data directory, that is, it must be in the
# same state as before bootstrapping.
#-Dcassandra.replace_address=listen_address or broadcast_address of dead node
# Allow restoring specific tables from an archived commit log.
#-Dcassandra.replayList=table
# Allows overriding of the default RING_DELAY (30000ms), which is the amount of time a node waits
# before joining the ring.
#-Dcassandra.ring_delay_ms=ms
# Set the SSL port for encrypted communication. (Default: 7001)
#-Dcassandra.ssl_storage_port=port
# Set the port for inter-node communication. (Default: 7000)
#-Dcassandra.storage_port=port
# Set the default location for the trigger JARs. (Default: conf/triggers)
#-Dcassandra.triggers_dir=directory
# For testing new compaction and compression strategies. It allows you to experiment with different
# strategies and benchmark write performance differences without affecting the production workload.
#-Dcassandra.write_survey=true
# To disable configuration via JMX of auth caches (such as those for credentials, permissions and
# roles). This will mean those config options can only be set (persistently) in cassandra.yaml
# and will require a restart for new values to take effect.
#-Dcassandra.disable_auth_caches_remote_configuration=true
# To disable dynamic calculation of the page size used when indexing an entire partition (during
# initial index build/rebuild). If set to true, the page size will be fixed to the default of
# 10000 rows per page.
#-Dcassandra.force_default_indexing_page_size=true
# Imposes an upper bound on hint lifetime below the normal min gc_grace_seconds
#-Dcassandra.maxHintTTL=max_hint_ttl_in_seconds
########################
# GENERAL JVM SETTINGS #
########################
# enable assertions. highly suggested for correct application functionality.
-ea
# disable assertions for net.openhft.** because it runs out of memory by design
# if enabled and run for more than just brief testing
-da:net.openhft...
# enable thread priorities, primarily so we can give periodic tasks
# a lower priority to avoid interfering with client workload
-XX:+UseThreadPriorities
# Enable heap-dump if there's an OOM
-XX:+HeapDumpOnOutOfMemoryError
# Per-thread stack size.
-Xss256k
# Make sure all memory is faulted and zeroed on startup.
# This helps prevent soft faults in containers and makes
# transparent hugepage allocation more effective.
-XX:+AlwaysPreTouch
# Disable biased locking as it does not benefit Cassandra.
-XX:-UseBiasedLocking
# Enable thread-local allocation blocks and allow the JVM to automatically
# resize them at runtime.
-XX:+UseTLAB
-XX:+ResizeTLAB
-XX:+UseNUMA
# http://www.evanjones.ca/jvm-mmap-pause.html
-XX:+PerfDisableSharedMem
# Prefer binding to IPv4 network intefaces (when net.ipv6.bindv6only=1). See
# http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6342561 (short version:
# comment out this entry to enable IPv6 support).
-Djava.net.preferIPv4Stack=true
### Debug options
# uncomment to enable flight recorder
#-XX:+UnlockCommercialFeatures
#-XX:+FlightRecorder
# uncomment to have Cassandra JVM listen for remote debuggers/profilers on port 1414
#-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=1414
# uncomment to have Cassandra JVM log internal method compilation (developers only)
#-XX:+UnlockDiagnosticVMOptions
#-XX:+LogCompilation
#################
# HEAP SETTINGS #
#################
# Heap size is automatically calculated by cassandra-env based on this
# formula: max(min(1/2 ram, 1024MB), min(1/4 ram, 8GB))
# That is:
# - calculate 1/2 ram and cap to 1024MB
# - calculate 1/4 ram and cap to 8192MB
# - pick the max
#
# For production use you may wish to adjust this for your environment.
# If that's the case, uncomment the -Xmx and Xms options below to override the
# automatic calculation of JVM heap memory.
#
# It is recommended to set min (-Xms) and max (-Xmx) heap sizes to
# the same value to avoid stop-the-world GC pauses during resize, and
# so that we can lock the heap in memory on startup to prevent any
# of it from being swapped out.
#-Xms4G
#-Xmx4G
# Young generation size is automatically calculated by cassandra-env
# based on this formula: min(100 * num_cores, 1/4 * heap size)
#
# The main trade-off for the young generation is that the larger it
# is, the longer GC pause times will be. The shorter it is, the more
# expensive GC will be (usually).
#
# It is not recommended to set the young generation size if using the
# G1 GC, since that will override the target pause-time goal.
# More info: http://www.oracle.com/technetwork/articles/java/g1gc-1984535.html
#
# The example below assumes a modern 8-core+ machine for decent
# times. If in doubt, and if you do not particularly want to tweak, go
# 100 MB per physical CPU core.
#-Xmn800M
###################################
# EXPIRATION DATE OVERFLOW POLICY #
###################################
# Defines how to handle INSERT requests with TTL exceeding the maximum supported expiration date:
# * REJECT: this is the default policy and will reject any requests with expiration date timestamp after 2038-01-19T03:14:06+00:00.
# * CAP: any insert with TTL expiring after 2038-01-19T03:14:06+00:00 will expire on 2038-01-19T03:14:06+00:00 and the client will receive a warning.
# * CAP_NOWARN: same as previous, except that the client warning will not be emitted.
#
#-Dcassandra.expiration_date_overflow_policy=REJECT

View File

@ -0,0 +1,29 @@
###########################################################################
# jvm11-clients.options #
# #
# See jvm-clients.options. This file is specific for Java 11 and newer. #
###########################################################################
###################
# JPMS SETTINGS #
###################
-Djdk.attach.allowAttachSelf=true
--add-exports java.base/jdk.internal.misc=ALL-UNNAMED
--add-exports java.base/jdk.internal.ref=ALL-UNNAMED
--add-exports java.base/sun.nio.ch=ALL-UNNAMED
--add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED
--add-exports java.sql/java.sql=ALL-UNNAMED
--add-opens java.base/java.lang.module=ALL-UNNAMED
--add-opens java.base/jdk.internal.loader=ALL-UNNAMED
--add-opens java.base/jdk.internal.ref=ALL-UNNAMED
--add-opens java.base/jdk.internal.reflect=ALL-UNNAMED
--add-opens java.base/jdk.internal.math=ALL-UNNAMED
--add-opens java.base/jdk.internal.module=ALL-UNNAMED
--add-opens java.base/jdk.internal.util.jar=ALL-UNNAMED
--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED
# The newline in the end of file is intentional

View File

@ -0,0 +1,103 @@
###########################################################################
# jvm11-server.options #
# #
# See jvm-server.options. This file is specific for Java 11 and newer. #
###########################################################################
#################
# GC SETTINGS #
#################
### CMS Settings
-XX:+UseConcMarkSweepGC
-XX:+CMSParallelRemarkEnabled
-XX:SurvivorRatio=8
-XX:MaxTenuringThreshold=1
-XX:CMSInitiatingOccupancyFraction=75
-XX:+UseCMSInitiatingOccupancyOnly
-XX:CMSWaitDuration=10000
-XX:+CMSParallelInitialMarkEnabled
-XX:+CMSEdenChunksRecordAlways
## some JVMs will fill up their heap when accessed via JMX, see CASSANDRA-6541
-XX:+CMSClassUnloadingEnabled
### G1 Settings
## Use the Hotspot garbage-first collector.
#-XX:+UseG1GC
#-XX:+ParallelRefProcEnabled
#
## Have the JVM do less remembered set work during STW, instead
## preferring concurrent GC. Reduces p99.9 latency.
#-XX:G1RSetUpdatingPauseTimePercent=5
#
## Main G1GC tunable: lowering the pause target will lower throughput and vise versa.
## 200ms is the JVM default and lowest viable setting
## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml.
#-XX:MaxGCPauseMillis=500
## Optional G1 Settings
# Save CPU time on large (>= 16GB) heaps by delaying region scanning
# until the heap is 70% full. The default in Hotspot 8u40 is 40%.
#-XX:InitiatingHeapOccupancyPercent=70
# For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores.
# Otherwise equal to the number of cores when 8 or less.
# Machines with > 10 cores should try setting these to <= full cores.
#-XX:ParallelGCThreads=16
# By default, ConcGCThreads is 1/4 of ParallelGCThreads.
# Setting both to the same value can reduce STW durations.
#-XX:ConcGCThreads=16
### JPMS
-Djdk.attach.allowAttachSelf=true
--add-exports java.base/jdk.internal.misc=ALL-UNNAMED
--add-exports java.base/jdk.internal.ref=ALL-UNNAMED
--add-exports java.base/sun.nio.ch=ALL-UNNAMED
--add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED
--add-exports java.sql/java.sql=ALL-UNNAMED
--add-opens java.base/java.lang.module=ALL-UNNAMED
--add-opens java.base/jdk.internal.loader=ALL-UNNAMED
--add-opens java.base/jdk.internal.ref=ALL-UNNAMED
--add-opens java.base/jdk.internal.reflect=ALL-UNNAMED
--add-opens java.base/jdk.internal.math=ALL-UNNAMED
--add-opens java.base/jdk.internal.module=ALL-UNNAMED
--add-opens java.base/jdk.internal.util.jar=ALL-UNNAMED
--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED
### GC logging options -- uncomment to enable
# Java 11 (and newer) GC logging options:
# See description of https://bugs.openjdk.java.net/browse/JDK-8046148 for details about the syntax
# The following is the equivalent to -XX:+PrintGCDetails -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M
#-Xlog:gc=info,heap*=trace,age*=debug,safepoint=info,promotion*=trace:file=/var/log/cassandra/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760
# Notes for Java 8 migration:
#
# -XX:+PrintGCDetails maps to -Xlog:gc*:... - i.e. add a '*' after "gc"
# -XX:+PrintGCDateStamps maps to decorator 'time'
#
# -XX:+PrintHeapAtGC maps to 'heap' with level 'trace'
# -XX:+PrintTenuringDistribution maps to 'age' with level 'debug'
# -XX:+PrintGCApplicationStoppedTime maps to 'safepoint' with level 'info'
# -XX:+PrintPromotionFailure maps to 'promotion' with level 'trace'
# -XX:PrintFLSStatistics=1 maps to 'freelist' with level 'trace'
### Netty Options
# On Java >= 9 Netty requires the io.netty.tryReflectionSetAccessible system property to be set to true to enable
# creation of direct buffers using Unsafe. Without it, this falls back to ByteBuffer.allocateDirect which has
# inferior performance and risks exceeding MaxDirectMemory
-Dio.netty.tryReflectionSetAccessible=true
# The newline in the end of file is intentional

View File

@ -0,0 +1,9 @@
###########################################################################
# jvm8-clients.options #
# #
# See jvm-clients.options. This file is specific for Java 8 and newer. #
###########################################################################
# intentionally left empty
# The newline in the end of file is intentional

View File

@ -0,0 +1,76 @@
###########################################################################
# jvm8-server.options #
# #
# See jvm-server.options. This file is specific for Java 8 and newer. #
###########################################################################
########################
# GENERAL JVM SETTINGS #
########################
# allows lowering thread priority without being root on linux - probably
# not necessary on Windows but doesn't harm anything.
# see http://tech.stolsvik.com/2010/01/linux-java-thread-priorities-workaround.html
-XX:ThreadPriorityPolicy=42
#################
# GC SETTINGS #
#################
### CMS Settings
-XX:+UseParNewGC
-XX:+UseConcMarkSweepGC
-XX:+CMSParallelRemarkEnabled
-XX:SurvivorRatio=8
-XX:MaxTenuringThreshold=1
-XX:CMSInitiatingOccupancyFraction=75
-XX:+UseCMSInitiatingOccupancyOnly
-XX:CMSWaitDuration=10000
-XX:+CMSParallelInitialMarkEnabled
-XX:+CMSEdenChunksRecordAlways
## some JVMs will fill up their heap when accessed via JMX, see CASSANDRA-6541
-XX:+CMSClassUnloadingEnabled
### G1 Settings
## Use the Hotspot garbage-first collector.
#-XX:+UseG1GC
#-XX:+ParallelRefProcEnabled
#
## Have the JVM do less remembered set work during STW, instead
## preferring concurrent GC. Reduces p99.9 latency.
#-XX:G1RSetUpdatingPauseTimePercent=5
#
## Main G1GC tunable: lowering the pause target will lower throughput and vise versa.
## 200ms is the JVM default and lowest viable setting
## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml.
#-XX:MaxGCPauseMillis=500
## Optional G1 Settings
# Save CPU time on large (>= 16GB) heaps by delaying region scanning
# until the heap is 70% full. The default in Hotspot 8u40 is 40%.
#-XX:InitiatingHeapOccupancyPercent=70
# For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores.
# Otherwise equal to the number of cores when 8 or less.
# Machines with > 10 cores should try setting these to <= full cores.
#-XX:ParallelGCThreads=16
# By default, ConcGCThreads is 1/4 of ParallelGCThreads.
# Setting both to the same value can reduce STW durations.
#-XX:ConcGCThreads=16
### GC logging options -- uncomment to enable
-XX:+PrintGCDetails
-XX:+PrintGCDateStamps
-XX:+PrintHeapAtGC
-XX:+PrintTenuringDistribution
-XX:+PrintGCApplicationStoppedTime
-XX:+PrintPromotionFailure
#-XX:PrintFLSStatistics=1
#-Xloggc:/var/log/cassandra/gc.log
-XX:+UseGCLogFileRotation
-XX:NumberOfGCLogFiles=10
-XX:GCLogFileSize=10M
# The newline in the end of file is intentional

View File

@ -0,0 +1,124 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<!--
In order to disable debug.log, comment-out the ASYNCDEBUGLOG
appender reference in the root level section below.
-->
<configuration scan="true" scanPeriod="60 seconds">
<jmxConfigurator />
<!-- No shutdown hook; we run it ourselves in StorageService after shutdown -->
<!-- SYSTEMLOG rolling file appender to system.log (INFO level) -->
<appender name="SYSTEMLOG" class="ch.qos.logback.core.rolling.RollingFileAppender">
<filter class="ch.qos.logback.classic.filter.ThresholdFilter">
<level>INFO</level>
</filter>
<file>${cassandra.logdir}/system.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<!-- rollover daily -->
<fileNamePattern>${cassandra.logdir}/system.log.%d{yyyy-MM-dd}.%i.zip</fileNamePattern>
<!-- each file should be at most 50MB, keep 7 days worth of history, but at most 5GB -->
<maxFileSize>50MB</maxFileSize>
<maxHistory>7</maxHistory>
<totalSizeCap>5GB</totalSizeCap>
</rollingPolicy>
<encoder>
<pattern>%-5level [%thread] %date{ISO8601} %F:%L - %msg%n</pattern>
</encoder>
</appender>
<!-- DEBUGLOG rolling file appender to debug.log (all levels) -->
<appender name="DEBUGLOG" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${cassandra.logdir}/debug.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<!-- rollover daily -->
<fileNamePattern>${cassandra.logdir}/debug.log.%d{yyyy-MM-dd}.%i.zip</fileNamePattern>
<!-- each file should be at most 50MB, keep 7 days worth of history, but at most 5GB -->
<maxFileSize>50MB</maxFileSize>
<maxHistory>7</maxHistory>
<totalSizeCap>5GB</totalSizeCap>
</rollingPolicy>
<encoder>
<pattern>%-5level [%thread] %date{ISO8601} %F:%L - %msg%n</pattern>
</encoder>
</appender>
<!-- ASYNCLOG assynchronous appender to debug.log (all levels) -->
<appender name="ASYNCDEBUGLOG" class="ch.qos.logback.classic.AsyncAppender">
<queueSize>1024</queueSize>
<discardingThreshold>0</discardingThreshold>
<includeCallerData>true</includeCallerData>
<appender-ref ref="DEBUGLOG" />
</appender>
<!-- STDOUT console appender to stdout (INFO level) -->
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<filter class="ch.qos.logback.classic.filter.ThresholdFilter">
<level>INFO</level>
</filter>
<encoder>
<pattern>%-5level [%thread] %date{ISO8601} %F:%L - %msg%n</pattern>
</encoder>
</appender>
<!-- Uncomment below configuration (Audit Logging (FileAuditLogger) rolling file appender and Audit Logging
additivity) in order to have the log events flow through separate log file instead of system.log.
Audit Logging (FileAuditLogger) rolling file appender to audit.log -->
<!-- <appender name="AUDIT" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${cassandra.logdir}/audit/audit.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy"> -->
<!-- rollover daily -->
<!-- <fileNamePattern>${cassandra.logdir}/audit/audit.log.%d{yyyy-MM-dd}.%i.zip</fileNamePattern> -->
<!-- each file should be at most 50MB, keep 30 days worth of history, but at most 5GB -->
<!-- <maxFileSize>50MB</maxFileSize>
<maxHistory>30</maxHistory>
<totalSizeCap>5GB</totalSizeCap>
</rollingPolicy>
<encoder>
<pattern>%-5level [%thread] %date{ISO8601} %F:%L - %msg%n</pattern>
</encoder>
</appender> -->
<!-- Audit Logging additivity to redirect audit logging events to audit/audit.log -->
<!-- <logger name="org.apache.cassandra.audit" additivity="false" level="INFO">
<appender-ref ref="AUDIT"/>
</logger> -->
<!-- Uncomment bellow and corresponding appender-ref to activate logback metrics
<appender name="LogbackMetrics" class="com.codahale.metrics.logback.InstrumentedAppender" />
-->
<root level="INFO">
<appender-ref ref="SYSTEMLOG" />
<appender-ref ref="STDOUT" />
<appender-ref ref="ASYNCDEBUGLOG" /> <!-- Comment this line to disable debug.log -->
<!--
<appender-ref ref="LogbackMetrics" />
-->
</root>
<logger name="org.apache.cassandra" level="DEBUG"/>
</configuration>

11
etc/cass3/.gitignore vendored 100644
View File

@ -0,0 +1,11 @@
# ignore everything
#*
# except
# .gitignore
# !cassandra.yaml
# !jvm*
# !logback.xml
# !commitlog_archiving.properties
# !cassandra-rackdc.properties
# !cassandra-env.sh

View File

@ -0,0 +1,307 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
calculate_heap_sizes()
{
case "`uname`" in
Linux)
system_memory_in_mb=`free -m | awk '/:/ {print $2;exit}'`
system_cpu_cores=`egrep -c 'processor([[:space:]]+):.*' /proc/cpuinfo`
;;
FreeBSD)
system_memory_in_bytes=`sysctl hw.physmem | awk '{print $2}'`
system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024`
system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'`
;;
SunOS)
system_memory_in_mb=`prtconf | awk '/Memory size:/ {print $3}'`
system_cpu_cores=`psrinfo | wc -l`
;;
Darwin)
system_memory_in_bytes=`sysctl hw.memsize | awk '{print $2}'`
system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024`
system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'`
;;
*)
# assume reasonable defaults for e.g. a modern desktop or
# cheap server
system_memory_in_mb="2048"
system_cpu_cores="2"
;;
esac
# some systems like the raspberry pi don't report cores, use at least 1
if [ "$system_cpu_cores" -lt "1" ]
then
system_cpu_cores="1"
fi
# set max heap size based on the following
# max(min(1/2 ram, 1024MB), min(1/4 ram, 8GB))
# calculate 1/2 ram and cap to 1024MB
# calculate 1/4 ram and cap to 8192MB
# pick the max
half_system_memory_in_mb=`expr $system_memory_in_mb / 2`
quarter_system_memory_in_mb=`expr $half_system_memory_in_mb / 2`
if [ "$half_system_memory_in_mb" -gt "1024" ]
then
half_system_memory_in_mb="1024"
fi
if [ "$quarter_system_memory_in_mb" -gt "8192" ]
then
quarter_system_memory_in_mb="8192"
fi
if [ "$half_system_memory_in_mb" -gt "$quarter_system_memory_in_mb" ]
then
max_heap_size_in_mb="$half_system_memory_in_mb"
else
max_heap_size_in_mb="$quarter_system_memory_in_mb"
fi
MAX_HEAP_SIZE="${max_heap_size_in_mb}M"
# Young gen: min(max_sensible_per_modern_cpu_core * num_cores, 1/4 * heap size)
max_sensible_yg_per_core_in_mb="100"
max_sensible_yg_in_mb=`expr $max_sensible_yg_per_core_in_mb "*" $system_cpu_cores`
desired_yg_in_mb=`expr $max_heap_size_in_mb / 4`
if [ "$desired_yg_in_mb" -gt "$max_sensible_yg_in_mb" ]
then
HEAP_NEWSIZE="${max_sensible_yg_in_mb}M"
else
HEAP_NEWSIZE="${desired_yg_in_mb}M"
fi
}
# Sets the path where logback and GC logs are written.
if [ "x$CASSANDRA_LOG_DIR" = "x" ] ; then
CASSANDRA_LOG_DIR="$CASSANDRA_HOME/logs"
fi
#GC log path has to be defined here because it needs to access CASSANDRA_HOME
if [ $JAVA_VERSION -ge 11 ] ; then
# See description of https://bugs.openjdk.java.net/browse/JDK-8046148 for details about the syntax
# The following is the equivalent to -XX:+PrintGCDetails -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M
echo "$JVM_OPTS" | grep -qe "-[X]log:gc"
if [ "$?" = "1" ] ; then # [X] to prevent ccm from replacing this line
# only add -Xlog:gc if it's not mentioned in jvm-server.options file
mkdir -p ${CASSANDRA_LOG_DIR}
JVM_OPTS="$JVM_OPTS -Xlog:gc=info,heap*=trace,age*=debug,safepoint=info,promotion*=trace:file=${CASSANDRA_LOG_DIR}/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760"
fi
else
# Java 8
echo "$JVM_OPTS" | grep -qe "-[X]loggc"
if [ "$?" = "1" ] ; then # [X] to prevent ccm from replacing this line
# only add -Xlog:gc if it's not mentioned in jvm-server.options file
mkdir -p ${CASSANDRA_LOG_DIR}
JVM_OPTS="$JVM_OPTS -Xloggc:${CASSANDRA_LOG_DIR}/gc.log"
fi
fi
# Check what parameters were defined on jvm-server.options file to avoid conflicts
echo $JVM_OPTS | grep -q Xmn
DEFINED_XMN=$?
echo $JVM_OPTS | grep -q Xmx
DEFINED_XMX=$?
echo $JVM_OPTS | grep -q Xms
DEFINED_XMS=$?
echo $JVM_OPTS | grep -q UseConcMarkSweepGC
USING_CMS=$?
echo $JVM_OPTS | grep -q +UseG1GC
USING_G1=$?
# Override these to set the amount of memory to allocate to the JVM at
# start-up. For production use you may wish to adjust this for your
# environment. MAX_HEAP_SIZE is the total amount of memory dedicated
# to the Java heap. HEAP_NEWSIZE refers to the size of the young
# generation. Both MAX_HEAP_SIZE and HEAP_NEWSIZE should be either set
# or not (if you set one, set the other).
#
# The main trade-off for the young generation is that the larger it
# is, the longer GC pause times will be. The shorter it is, the more
# expensive GC will be (usually).
#
# The example HEAP_NEWSIZE assumes a modern 8-core+ machine for decent pause
# times. If in doubt, and if you do not particularly want to tweak, go with
# 100 MB per physical CPU core.
#MAX_HEAP_SIZE="4G"
#HEAP_NEWSIZE="800M"
# Set this to control the amount of arenas per-thread in glibc
#export MALLOC_ARENA_MAX=4
# only calculate the size if it's not set manually
if [ "x$MAX_HEAP_SIZE" = "x" ] && [ "x$HEAP_NEWSIZE" = "x" -o $USING_G1 -eq 0 ]; then
calculate_heap_sizes
elif [ "x$MAX_HEAP_SIZE" = "x" ] || [ "x$HEAP_NEWSIZE" = "x" -a $USING_G1 -ne 0 ]; then
echo "please set or unset MAX_HEAP_SIZE and HEAP_NEWSIZE in pairs when using CMS GC (see cassandra-env.sh)"
exit 1
fi
if [ "x$MALLOC_ARENA_MAX" = "x" ] ; then
export MALLOC_ARENA_MAX=4
fi
# We only set -Xms and -Xmx if they were not defined on jvm-server.options file
# If defined, both Xmx and Xms should be defined together.
if [ $DEFINED_XMX -ne 0 ] && [ $DEFINED_XMS -ne 0 ]; then
JVM_OPTS="$JVM_OPTS -Xms${MAX_HEAP_SIZE}"
JVM_OPTS="$JVM_OPTS -Xmx${MAX_HEAP_SIZE}"
elif [ $DEFINED_XMX -ne 0 ] || [ $DEFINED_XMS -ne 0 ]; then
echo "Please set or unset -Xmx and -Xms flags in pairs on jvm-server.options file."
exit 1
fi
# We only set -Xmn flag if it was not defined in jvm-server.options file
# and if the CMS GC is being used
# If defined, both Xmn and Xmx should be defined together.
if [ $DEFINED_XMN -eq 0 ] && [ $DEFINED_XMX -ne 0 ]; then
echo "Please set or unset -Xmx and -Xmn flags in pairs on jvm-server.options file."
exit 1
elif [ $DEFINED_XMN -ne 0 ] && [ $USING_CMS -eq 0 ]; then
JVM_OPTS="$JVM_OPTS -Xmn${HEAP_NEWSIZE}"
fi
# We fail to start if -Xmn is used with G1 GC is being used
# See comments for -Xmn in jvm-server.options
if [ $DEFINED_XMN -eq 0 ] && [ $USING_G1 -eq 0 ]; then
echo "It is not recommended to set -Xmn with the G1 garbage collector. See comments for -Xmn in jvm-server.options for details."
exit 1
fi
if [ "$JVM_ARCH" = "64-Bit" ] && [ $USING_CMS -eq 0 ]; then
JVM_OPTS="$JVM_OPTS -XX:+UseCondCardMark"
fi
# provides hints to the JIT compiler
JVM_OPTS="$JVM_OPTS -XX:CompileCommandFile=$CASSANDRA_CONF/hotspot_compiler"
# add the jamm javaagent
JVM_OPTS="$JVM_OPTS -javaagent:$CASSANDRA_HOME/lib/jamm-0.3.2.jar"
# set jvm HeapDumpPath with CASSANDRA_HEAPDUMP_DIR
if [ "x$CASSANDRA_HEAPDUMP_DIR" != "x" ]; then
JVM_OPTS="$JVM_OPTS -XX:HeapDumpPath=$CASSANDRA_HEAPDUMP_DIR/cassandra-`date +%s`-pid$$.hprof"
fi
# stop the jvm on OutOfMemoryError as it can result in some data corruption
# uncomment the preferred option
# ExitOnOutOfMemoryError and CrashOnOutOfMemoryError require a JRE greater or equals to 1.7 update 101 or 1.8 update 92
# For OnOutOfMemoryError we cannot use the JVM_OPTS variables because bash commands split words
# on white spaces without taking quotes into account
# JVM_OPTS="$JVM_OPTS -XX:+ExitOnOutOfMemoryError"
# JVM_OPTS="$JVM_OPTS -XX:+CrashOnOutOfMemoryError"
JVM_ON_OUT_OF_MEMORY_ERROR_OPT="-XX:OnOutOfMemoryError=kill -9 %p"
# print an heap histogram on OutOfMemoryError
# JVM_OPTS="$JVM_OPTS -Dcassandra.printHeapHistogramOnOutOfMemoryError=true"
# jmx: metrics and administration interface
#
# add this if you're having trouble connecting:
# JVM_OPTS="$JVM_OPTS -Djava.rmi.server.hostname=<public name>"
#
# see
# https://blogs.oracle.com/jmxetc/entry/troubleshooting_connection_problems_in_jconsole
# for more on configuring JMX through firewalls, etc. (Short version:
# get it working with no firewall first.)
#
# Cassandra ships with JMX accessible *only* from localhost.
# To enable remote JMX connections, uncomment lines below
# with authentication and/or ssl enabled. See https://wiki.apache.org/cassandra/JmxSecurity
#
if [ "x$LOCAL_JMX" = "x" ]; then
LOCAL_JMX=yes
fi
# Specifies the default port over which Cassandra will be available for
# JMX connections.
# For security reasons, you should not expose this port to the internet. Firewall it if needed.
JMX_PORT="7199"
if [ "$LOCAL_JMX" = "yes" ]; then
JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.local.port=$JMX_PORT"
JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=false"
else
JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.port=$JMX_PORT"
# if ssl is enabled the same port cannot be used for both jmx and rmi so either
# pick another value for this property or comment out to use a random port (though see CASSANDRA-7087 for origins)
JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.rmi.port=$JMX_PORT"
# turn on JMX authentication. See below for further options
JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=true"
# jmx ssl options
#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl=true"
#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.need.client.auth=true"
#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.enabled.protocols=<enabled-protocols>"
#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.enabled.cipher.suites=<enabled-cipher-suites>"
#JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.keyStore=/path/to/keystore"
#JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.keyStorePassword=<keystore-password>"
#JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.trustStore=/path/to/truststore"
#JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.trustStorePassword=<truststore-password>"
fi
# jmx authentication and authorization options. By default, auth is only
# activated for remote connections but they can also be enabled for local only JMX
## Basic file based authn & authz
JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.password.file=/etc/cassandra/jmxremote.password"
#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.access.file=/etc/cassandra/jmxremote.access"
## Custom auth settings which can be used as alternatives to JMX's out of the box auth utilities.
## JAAS login modules can be used for authentication by uncommenting these two properties.
## Cassandra ships with a LoginModule implementation - org.apache.cassandra.auth.CassandraLoginModule -
## which delegates to the IAuthenticator configured in cassandra.yaml. See the sample JAAS configuration
## file cassandra-jaas.config
#JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.login.config=CassandraLogin"
#JVM_OPTS="$JVM_OPTS -Djava.security.auth.login.config=$CASSANDRA_CONF/cassandra-jaas.config"
## Cassandra also ships with a helper for delegating JMX authz calls to the configured IAuthorizer,
## uncomment this to use it. Requires one of the two authentication options to be enabled
#JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.authorizer=org.apache.cassandra.auth.jmx.AuthorizationProxy"
# To use mx4j, an HTML interface for JMX, add mx4j-tools.jar to the lib/
# directory.
# See http://cassandra.apache.org/doc/latest/operating/metrics.html#jmx
# By default mx4j listens on the broadcast_address, port 8081. Uncomment the following lines
# to control its listen address and port.
#MX4J_ADDRESS="127.0.0.1"
#MX4J_PORT="8081"
# Cassandra uses SIGAR to capture OS metrics CASSANDRA-7838
# for SIGAR we have to set the java.library.path
# to the location of the native libraries.
JVM_OPTS="$JVM_OPTS -Djava.library.path=$CASSANDRA_HOME/lib/sigar-bin"
if [ "x$MX4J_ADDRESS" != "x" ]; then
if [[ "$MX4J_ADDRESS" == \-Dmx4jaddress* ]]; then
# Backward compatible with the older style #13578
JVM_OPTS="$JVM_OPTS $MX4J_ADDRESS"
else
JVM_OPTS="$JVM_OPTS -Dmx4jaddress=$MX4J_ADDRESS"
fi
fi
if [ "x$MX4J_PORT" != "x" ]; then
if [[ "$MX4J_PORT" == \-Dmx4jport* ]]; then
# Backward compatible with the older style #13578
JVM_OPTS="$JVM_OPTS $MX4J_PORT"
else
JVM_OPTS="$JVM_OPTS -Dmx4jport=$MX4J_PORT"
fi
fi
JVM_OPTS="$JVM_OPTS $JVM_EXTRA_OPTS"

View File

@ -0,0 +1,39 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# These properties are used with GossipingPropertyFileSnitch and will
# indicate the rack and dc for this node
dc= Mars
rack= West
# Add a suffix to a datacenter name. Used by the Ec2Snitch and Ec2MultiRegionSnitch
# to append a string to the EC2 region name.
#dc_suffix=
# Uncomment the following line to make this snitch prefer the internal ip when possible, as the Ec2MultiRegionSnitch does.
# prefer_local=true
# Datacenter and rack naming convention used by the Ec2Snitch and Ec2MultiRegionSnitch.
# Options are:
# legacy : datacenter name is the part of the availability zone name preceding the last "-"
# when the zone ends in -1 and includes the number if not -1. Rack is the portion of
# the availability zone name following the last "-".
# Examples: us-west-1a => dc: us-west, rack: 1a; us-west-2b => dc: us-west-2, rack: 2b;
# YOU MUST USE THIS VALUE IF YOU ARE UPGRADING A PRE-4.0 CLUSTER
# standard : Default value. datacenter name is the standard AWS region name, including the number.
# rack name is the region plus the availability zone letter.
# Examples: us-west-1a => dc: us-west-1, rack: us-west-1a; us-west-2b => dc: us-west-2, rack: us-west-2b;
# ec2_naming_scheme=standard

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,48 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# commitlog archiving configuration. Leave blank to disable.
# Command to execute to archive a commitlog segment
# Parameters: %path => Fully qualified path of the segment to archive
# %name => Name of the commit log.
# Example: archive_command=/bin/ln %path /backup/%name
#
# Limitation: *_command= expects one command with arguments. STDOUT
# and STDIN or multiple commands cannot be executed. You might want
# to script multiple commands and add a pointer here.
archive_command=
# Command to execute to make an archived commitlog live again.
# Parameters: %from is the full path to an archived commitlog segment (from restore_directories)
# %to is the live commitlog directory
# Example: restore_command=/bin/cp -f %from %to
restore_command=
# Directory to scan the recovery files in.
restore_directories=
# Restore mutations created up to and including this timestamp in GMT.
# Format: yyyy:MM:dd HH:mm:ss (2012:04:31 20:43:12)
#
# Recovery will continue through the segment when the first client-supplied
# timestamp greater than this time is encountered, but only mutations less than
# or equal to this timestamp will be applied.
restore_point_in_time=
# precision of the timestamp used in the inserts (MILLISECONDS, MICROSECONDS, ...)
precision=MICROSECONDS

View File

@ -0,0 +1,10 @@
###########################################################################
# jvm-clients.options #
# #
# See jvm8-clients.options and jvm11-clients.options for Java version #
# specific options. #
###########################################################################
# intentionally left empty
# The newline in the end of file is intentional

View File

@ -0,0 +1,188 @@
###########################################################################
# jvm-server.options #
# #
# - all flags defined here will be used by cassandra to startup the JVM #
# - one flag should be specified per line #
# - lines that do not start with '-' will be ignored #
# - only static flags are accepted (no variables or parameters) #
# - dynamic flags will be appended to these on cassandra-env #
# #
# See jvm8-server.options and jvm11-server.options for Java version #
# specific options. #
###########################################################################
######################
# STARTUP PARAMETERS #
######################
# Uncomment any of the following properties to enable specific startup parameters
# In a multi-instance deployment, multiple Cassandra instances will independently assume that all
# CPU processors are available to it. This setting allows you to specify a smaller set of processors
# and perhaps have affinity.
#-Dcassandra.available_processors=number_of_processors
# The directory location of the cassandra.yaml file.
#-Dcassandra.config=directory
# Sets the initial partitioner token for a node the first time the node is started.
#-Dcassandra.initial_token=token
# Set to false to start Cassandra on a node but not have the node join the cluster.
#-Dcassandra.join_ring=true|false
# Set to false to clear all gossip state for the node on restart. Use when you have changed node
# information in cassandra.yaml (such as listen_address).
#-Dcassandra.load_ring_state=true|false
# Enable pluggable metrics reporter. See Pluggable metrics reporting in Cassandra 2.0.2.
#-Dcassandra.metricsReporterConfigFile=file
# Set the port on which the CQL native transport listens for clients. (Default: 9042)
#-Dcassandra.native_transport_port=port
# Overrides the partitioner. (Default: org.apache.cassandra.dht.Murmur3Partitioner)
#-Dcassandra.partitioner=partitioner
# To replace a node that has died, restart a new node in its place specifying the address of the
# dead node. The new node must not have any data in its data directory, that is, it must be in the
# same state as before bootstrapping.
#-Dcassandra.replace_address=listen_address or broadcast_address of dead node
# Allow restoring specific tables from an archived commit log.
#-Dcassandra.replayList=table
# Allows overriding of the default RING_DELAY (30000ms), which is the amount of time a node waits
# before joining the ring.
#-Dcassandra.ring_delay_ms=ms
# Set the SSL port for encrypted communication. (Default: 7001)
#-Dcassandra.ssl_storage_port=port
# Set the port for inter-node communication. (Default: 7000)
#-Dcassandra.storage_port=port
# Set the default location for the trigger JARs. (Default: conf/triggers)
#-Dcassandra.triggers_dir=directory
# For testing new compaction and compression strategies. It allows you to experiment with different
# strategies and benchmark write performance differences without affecting the production workload.
#-Dcassandra.write_survey=true
# To disable configuration via JMX of auth caches (such as those for credentials, permissions and
# roles). This will mean those config options can only be set (persistently) in cassandra.yaml
# and will require a restart for new values to take effect.
#-Dcassandra.disable_auth_caches_remote_configuration=true
# To disable dynamic calculation of the page size used when indexing an entire partition (during
# initial index build/rebuild). If set to true, the page size will be fixed to the default of
# 10000 rows per page.
#-Dcassandra.force_default_indexing_page_size=true
# Imposes an upper bound on hint lifetime below the normal min gc_grace_seconds
#-Dcassandra.maxHintTTL=max_hint_ttl_in_seconds
########################
# GENERAL JVM SETTINGS #
########################
# enable assertions. highly suggested for correct application functionality.
-ea
# disable assertions for net.openhft.** because it runs out of memory by design
# if enabled and run for more than just brief testing
-da:net.openhft...
# enable thread priorities, primarily so we can give periodic tasks
# a lower priority to avoid interfering with client workload
-XX:+UseThreadPriorities
# Enable heap-dump if there's an OOM
-XX:+HeapDumpOnOutOfMemoryError
# Per-thread stack size.
-Xss256k
# Make sure all memory is faulted and zeroed on startup.
# This helps prevent soft faults in containers and makes
# transparent hugepage allocation more effective.
-XX:+AlwaysPreTouch
# Disable biased locking as it does not benefit Cassandra.
-XX:-UseBiasedLocking
# Enable thread-local allocation blocks and allow the JVM to automatically
# resize them at runtime.
-XX:+UseTLAB
-XX:+ResizeTLAB
-XX:+UseNUMA
# http://www.evanjones.ca/jvm-mmap-pause.html
-XX:+PerfDisableSharedMem
# Prefer binding to IPv4 network intefaces (when net.ipv6.bindv6only=1). See
# http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6342561 (short version:
# comment out this entry to enable IPv6 support).
-Djava.net.preferIPv4Stack=true
### Debug options
# uncomment to enable flight recorder
#-XX:+UnlockCommercialFeatures
#-XX:+FlightRecorder
# uncomment to have Cassandra JVM listen for remote debuggers/profilers on port 1414
#-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=1414
# uncomment to have Cassandra JVM log internal method compilation (developers only)
#-XX:+UnlockDiagnosticVMOptions
#-XX:+LogCompilation
#################
# HEAP SETTINGS #
#################
# Heap size is automatically calculated by cassandra-env based on this
# formula: max(min(1/2 ram, 1024MB), min(1/4 ram, 8GB))
# That is:
# - calculate 1/2 ram and cap to 1024MB
# - calculate 1/4 ram and cap to 8192MB
# - pick the max
#
# For production use you may wish to adjust this for your environment.
# If that's the case, uncomment the -Xmx and Xms options below to override the
# automatic calculation of JVM heap memory.
#
# It is recommended to set min (-Xms) and max (-Xmx) heap sizes to
# the same value to avoid stop-the-world GC pauses during resize, and
# so that we can lock the heap in memory on startup to prevent any
# of it from being swapped out.
#-Xms4G
#-Xmx4G
# Young generation size is automatically calculated by cassandra-env
# based on this formula: min(100 * num_cores, 1/4 * heap size)
#
# The main trade-off for the young generation is that the larger it
# is, the longer GC pause times will be. The shorter it is, the more
# expensive GC will be (usually).
#
# It is not recommended to set the young generation size if using the
# G1 GC, since that will override the target pause-time goal.
# More info: http://www.oracle.com/technetwork/articles/java/g1gc-1984535.html
#
# The example below assumes a modern 8-core+ machine for decent
# times. If in doubt, and if you do not particularly want to tweak, go
# 100 MB per physical CPU core.
#-Xmn800M
###################################
# EXPIRATION DATE OVERFLOW POLICY #
###################################
# Defines how to handle INSERT requests with TTL exceeding the maximum supported expiration date:
# * REJECT: this is the default policy and will reject any requests with expiration date timestamp after 2038-01-19T03:14:06+00:00.
# * CAP: any insert with TTL expiring after 2038-01-19T03:14:06+00:00 will expire on 2038-01-19T03:14:06+00:00 and the client will receive a warning.
# * CAP_NOWARN: same as previous, except that the client warning will not be emitted.
#
#-Dcassandra.expiration_date_overflow_policy=REJECT

View File

@ -0,0 +1,29 @@
###########################################################################
# jvm11-clients.options #
# #
# See jvm-clients.options. This file is specific for Java 11 and newer. #
###########################################################################
###################
# JPMS SETTINGS #
###################
-Djdk.attach.allowAttachSelf=true
--add-exports java.base/jdk.internal.misc=ALL-UNNAMED
--add-exports java.base/jdk.internal.ref=ALL-UNNAMED
--add-exports java.base/sun.nio.ch=ALL-UNNAMED
--add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED
--add-exports java.sql/java.sql=ALL-UNNAMED
--add-opens java.base/java.lang.module=ALL-UNNAMED
--add-opens java.base/jdk.internal.loader=ALL-UNNAMED
--add-opens java.base/jdk.internal.ref=ALL-UNNAMED
--add-opens java.base/jdk.internal.reflect=ALL-UNNAMED
--add-opens java.base/jdk.internal.math=ALL-UNNAMED
--add-opens java.base/jdk.internal.module=ALL-UNNAMED
--add-opens java.base/jdk.internal.util.jar=ALL-UNNAMED
--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED
# The newline in the end of file is intentional

View File

@ -0,0 +1,103 @@
###########################################################################
# jvm11-server.options #
# #
# See jvm-server.options. This file is specific for Java 11 and newer. #
###########################################################################
#################
# GC SETTINGS #
#################
### CMS Settings
-XX:+UseConcMarkSweepGC
-XX:+CMSParallelRemarkEnabled
-XX:SurvivorRatio=8
-XX:MaxTenuringThreshold=1
-XX:CMSInitiatingOccupancyFraction=75
-XX:+UseCMSInitiatingOccupancyOnly
-XX:CMSWaitDuration=10000
-XX:+CMSParallelInitialMarkEnabled
-XX:+CMSEdenChunksRecordAlways
## some JVMs will fill up their heap when accessed via JMX, see CASSANDRA-6541
-XX:+CMSClassUnloadingEnabled
### G1 Settings
## Use the Hotspot garbage-first collector.
#-XX:+UseG1GC
#-XX:+ParallelRefProcEnabled
#
## Have the JVM do less remembered set work during STW, instead
## preferring concurrent GC. Reduces p99.9 latency.
#-XX:G1RSetUpdatingPauseTimePercent=5
#
## Main G1GC tunable: lowering the pause target will lower throughput and vise versa.
## 200ms is the JVM default and lowest viable setting
## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml.
#-XX:MaxGCPauseMillis=500
## Optional G1 Settings
# Save CPU time on large (>= 16GB) heaps by delaying region scanning
# until the heap is 70% full. The default in Hotspot 8u40 is 40%.
#-XX:InitiatingHeapOccupancyPercent=70
# For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores.
# Otherwise equal to the number of cores when 8 or less.
# Machines with > 10 cores should try setting these to <= full cores.
#-XX:ParallelGCThreads=16
# By default, ConcGCThreads is 1/4 of ParallelGCThreads.
# Setting both to the same value can reduce STW durations.
#-XX:ConcGCThreads=16
### JPMS
-Djdk.attach.allowAttachSelf=true
--add-exports java.base/jdk.internal.misc=ALL-UNNAMED
--add-exports java.base/jdk.internal.ref=ALL-UNNAMED
--add-exports java.base/sun.nio.ch=ALL-UNNAMED
--add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED
--add-exports java.sql/java.sql=ALL-UNNAMED
--add-opens java.base/java.lang.module=ALL-UNNAMED
--add-opens java.base/jdk.internal.loader=ALL-UNNAMED
--add-opens java.base/jdk.internal.ref=ALL-UNNAMED
--add-opens java.base/jdk.internal.reflect=ALL-UNNAMED
--add-opens java.base/jdk.internal.math=ALL-UNNAMED
--add-opens java.base/jdk.internal.module=ALL-UNNAMED
--add-opens java.base/jdk.internal.util.jar=ALL-UNNAMED
--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED
### GC logging options -- uncomment to enable
# Java 11 (and newer) GC logging options:
# See description of https://bugs.openjdk.java.net/browse/JDK-8046148 for details about the syntax
# The following is the equivalent to -XX:+PrintGCDetails -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M
#-Xlog:gc=info,heap*=trace,age*=debug,safepoint=info,promotion*=trace:file=/var/log/cassandra/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760
# Notes for Java 8 migration:
#
# -XX:+PrintGCDetails maps to -Xlog:gc*:... - i.e. add a '*' after "gc"
# -XX:+PrintGCDateStamps maps to decorator 'time'
#
# -XX:+PrintHeapAtGC maps to 'heap' with level 'trace'
# -XX:+PrintTenuringDistribution maps to 'age' with level 'debug'
# -XX:+PrintGCApplicationStoppedTime maps to 'safepoint' with level 'info'
# -XX:+PrintPromotionFailure maps to 'promotion' with level 'trace'
# -XX:PrintFLSStatistics=1 maps to 'freelist' with level 'trace'
### Netty Options
# On Java >= 9 Netty requires the io.netty.tryReflectionSetAccessible system property to be set to true to enable
# creation of direct buffers using Unsafe. Without it, this falls back to ByteBuffer.allocateDirect which has
# inferior performance and risks exceeding MaxDirectMemory
-Dio.netty.tryReflectionSetAccessible=true
# The newline in the end of file is intentional

View File

@ -0,0 +1,9 @@
###########################################################################
# jvm8-clients.options #
# #
# See jvm-clients.options. This file is specific for Java 8 and newer. #
###########################################################################
# intentionally left empty
# The newline in the end of file is intentional

View File

@ -0,0 +1,76 @@
###########################################################################
# jvm8-server.options #
# #
# See jvm-server.options. This file is specific for Java 8 and newer. #
###########################################################################
########################
# GENERAL JVM SETTINGS #
########################
# allows lowering thread priority without being root on linux - probably
# not necessary on Windows but doesn't harm anything.
# see http://tech.stolsvik.com/2010/01/linux-java-thread-priorities-workaround.html
-XX:ThreadPriorityPolicy=42
#################
# GC SETTINGS #
#################
### CMS Settings
-XX:+UseParNewGC
-XX:+UseConcMarkSweepGC
-XX:+CMSParallelRemarkEnabled
-XX:SurvivorRatio=8
-XX:MaxTenuringThreshold=1
-XX:CMSInitiatingOccupancyFraction=75
-XX:+UseCMSInitiatingOccupancyOnly
-XX:CMSWaitDuration=10000
-XX:+CMSParallelInitialMarkEnabled
-XX:+CMSEdenChunksRecordAlways
## some JVMs will fill up their heap when accessed via JMX, see CASSANDRA-6541
-XX:+CMSClassUnloadingEnabled
### G1 Settings
## Use the Hotspot garbage-first collector.
#-XX:+UseG1GC
#-XX:+ParallelRefProcEnabled
#
## Have the JVM do less remembered set work during STW, instead
## preferring concurrent GC. Reduces p99.9 latency.
#-XX:G1RSetUpdatingPauseTimePercent=5
#
## Main G1GC tunable: lowering the pause target will lower throughput and vise versa.
## 200ms is the JVM default and lowest viable setting
## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml.
#-XX:MaxGCPauseMillis=500
## Optional G1 Settings
# Save CPU time on large (>= 16GB) heaps by delaying region scanning
# until the heap is 70% full. The default in Hotspot 8u40 is 40%.
#-XX:InitiatingHeapOccupancyPercent=70
# For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores.
# Otherwise equal to the number of cores when 8 or less.
# Machines with > 10 cores should try setting these to <= full cores.
#-XX:ParallelGCThreads=16
# By default, ConcGCThreads is 1/4 of ParallelGCThreads.
# Setting both to the same value can reduce STW durations.
#-XX:ConcGCThreads=16
### GC logging options -- uncomment to enable
-XX:+PrintGCDetails
-XX:+PrintGCDateStamps
-XX:+PrintHeapAtGC
-XX:+PrintTenuringDistribution
-XX:+PrintGCApplicationStoppedTime
-XX:+PrintPromotionFailure
#-XX:PrintFLSStatistics=1
#-Xloggc:/var/log/cassandra/gc.log
-XX:+UseGCLogFileRotation
-XX:NumberOfGCLogFiles=10
-XX:GCLogFileSize=10M
# The newline in the end of file is intentional

View File

@ -0,0 +1,124 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<!--
In order to disable debug.log, comment-out the ASYNCDEBUGLOG
appender reference in the root level section below.
-->
<configuration scan="true" scanPeriod="60 seconds">
<jmxConfigurator />
<!-- No shutdown hook; we run it ourselves in StorageService after shutdown -->
<!-- SYSTEMLOG rolling file appender to system.log (INFO level) -->
<appender name="SYSTEMLOG" class="ch.qos.logback.core.rolling.RollingFileAppender">
<filter class="ch.qos.logback.classic.filter.ThresholdFilter">
<level>INFO</level>
</filter>
<file>${cassandra.logdir}/system.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<!-- rollover daily -->
<fileNamePattern>${cassandra.logdir}/system.log.%d{yyyy-MM-dd}.%i.zip</fileNamePattern>
<!-- each file should be at most 50MB, keep 7 days worth of history, but at most 5GB -->
<maxFileSize>50MB</maxFileSize>
<maxHistory>7</maxHistory>
<totalSizeCap>5GB</totalSizeCap>
</rollingPolicy>
<encoder>
<pattern>%-5level [%thread] %date{ISO8601} %F:%L - %msg%n</pattern>
</encoder>
</appender>
<!-- DEBUGLOG rolling file appender to debug.log (all levels) -->
<appender name="DEBUGLOG" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${cassandra.logdir}/debug.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<!-- rollover daily -->
<fileNamePattern>${cassandra.logdir}/debug.log.%d{yyyy-MM-dd}.%i.zip</fileNamePattern>
<!-- each file should be at most 50MB, keep 7 days worth of history, but at most 5GB -->
<maxFileSize>50MB</maxFileSize>
<maxHistory>7</maxHistory>
<totalSizeCap>5GB</totalSizeCap>
</rollingPolicy>
<encoder>
<pattern>%-5level [%thread] %date{ISO8601} %F:%L - %msg%n</pattern>
</encoder>
</appender>
<!-- ASYNCLOG assynchronous appender to debug.log (all levels) -->
<appender name="ASYNCDEBUGLOG" class="ch.qos.logback.classic.AsyncAppender">
<queueSize>1024</queueSize>
<discardingThreshold>0</discardingThreshold>
<includeCallerData>true</includeCallerData>
<appender-ref ref="DEBUGLOG" />
</appender>
<!-- STDOUT console appender to stdout (INFO level) -->
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<filter class="ch.qos.logback.classic.filter.ThresholdFilter">
<level>INFO</level>
</filter>
<encoder>
<pattern>%-5level [%thread] %date{ISO8601} %F:%L - %msg%n</pattern>
</encoder>
</appender>
<!-- Uncomment below configuration (Audit Logging (FileAuditLogger) rolling file appender and Audit Logging
additivity) in order to have the log events flow through separate log file instead of system.log.
Audit Logging (FileAuditLogger) rolling file appender to audit.log -->
<!-- <appender name="AUDIT" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${cassandra.logdir}/audit/audit.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy"> -->
<!-- rollover daily -->
<!-- <fileNamePattern>${cassandra.logdir}/audit/audit.log.%d{yyyy-MM-dd}.%i.zip</fileNamePattern> -->
<!-- each file should be at most 50MB, keep 30 days worth of history, but at most 5GB -->
<!-- <maxFileSize>50MB</maxFileSize>
<maxHistory>30</maxHistory>
<totalSizeCap>5GB</totalSizeCap>
</rollingPolicy>
<encoder>
<pattern>%-5level [%thread] %date{ISO8601} %F:%L - %msg%n</pattern>
</encoder>
</appender> -->
<!-- Audit Logging additivity to redirect audit logging events to audit/audit.log -->
<!-- <logger name="org.apache.cassandra.audit" additivity="false" level="INFO">
<appender-ref ref="AUDIT"/>
</logger> -->
<!-- Uncomment bellow and corresponding appender-ref to activate logback metrics
<appender name="LogbackMetrics" class="com.codahale.metrics.logback.InstrumentedAppender" />
-->
<root level="INFO">
<appender-ref ref="SYSTEMLOG" />
<appender-ref ref="STDOUT" />
<appender-ref ref="ASYNCDEBUGLOG" /> <!-- Comment this line to disable debug.log -->
<!--
<appender-ref ref="LogbackMetrics" />
-->
</root>
<logger name="org.apache.cassandra" level="DEBUG"/>
</configuration>

View File

@ -0,0 +1,11 @@
# ignore everything
#*
# except
# .gitignore
# !cassandra.yaml
# !jvm*
# !logback.xml
# !commitlog_archiving.properties
# !cassandra-rackdc.properties
# !cassandra-env.sh

View File

@ -0,0 +1,307 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
calculate_heap_sizes()
{
case "`uname`" in
Linux)
system_memory_in_mb=`free -m | awk '/:/ {print $2;exit}'`
system_cpu_cores=`egrep -c 'processor([[:space:]]+):.*' /proc/cpuinfo`
;;
FreeBSD)
system_memory_in_bytes=`sysctl hw.physmem | awk '{print $2}'`
system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024`
system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'`
;;
SunOS)
system_memory_in_mb=`prtconf | awk '/Memory size:/ {print $3}'`
system_cpu_cores=`psrinfo | wc -l`
;;
Darwin)
system_memory_in_bytes=`sysctl hw.memsize | awk '{print $2}'`
system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024`
system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'`
;;
*)
# assume reasonable defaults for e.g. a modern desktop or
# cheap server
system_memory_in_mb="2048"
system_cpu_cores="2"
;;
esac
# some systems like the raspberry pi don't report cores, use at least 1
if [ "$system_cpu_cores" -lt "1" ]
then
system_cpu_cores="1"
fi
# set max heap size based on the following
# max(min(1/2 ram, 1024MB), min(1/4 ram, 8GB))
# calculate 1/2 ram and cap to 1024MB
# calculate 1/4 ram and cap to 8192MB
# pick the max
half_system_memory_in_mb=`expr $system_memory_in_mb / 2`
quarter_system_memory_in_mb=`expr $half_system_memory_in_mb / 2`
if [ "$half_system_memory_in_mb" -gt "1024" ]
then
half_system_memory_in_mb="1024"
fi
if [ "$quarter_system_memory_in_mb" -gt "8192" ]
then
quarter_system_memory_in_mb="8192"
fi
if [ "$half_system_memory_in_mb" -gt "$quarter_system_memory_in_mb" ]
then
max_heap_size_in_mb="$half_system_memory_in_mb"
else
max_heap_size_in_mb="$quarter_system_memory_in_mb"
fi
MAX_HEAP_SIZE="${max_heap_size_in_mb}M"
# Young gen: min(max_sensible_per_modern_cpu_core * num_cores, 1/4 * heap size)
max_sensible_yg_per_core_in_mb="100"
max_sensible_yg_in_mb=`expr $max_sensible_yg_per_core_in_mb "*" $system_cpu_cores`
desired_yg_in_mb=`expr $max_heap_size_in_mb / 4`
if [ "$desired_yg_in_mb" -gt "$max_sensible_yg_in_mb" ]
then
HEAP_NEWSIZE="${max_sensible_yg_in_mb}M"
else
HEAP_NEWSIZE="${desired_yg_in_mb}M"
fi
}
# Sets the path where logback and GC logs are written.
if [ "x$CASSANDRA_LOG_DIR" = "x" ] ; then
CASSANDRA_LOG_DIR="$CASSANDRA_HOME/logs"
fi
#GC log path has to be defined here because it needs to access CASSANDRA_HOME
if [ $JAVA_VERSION -ge 11 ] ; then
# See description of https://bugs.openjdk.java.net/browse/JDK-8046148 for details about the syntax
# The following is the equivalent to -XX:+PrintGCDetails -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M
echo "$JVM_OPTS" | grep -qe "-[X]log:gc"
if [ "$?" = "1" ] ; then # [X] to prevent ccm from replacing this line
# only add -Xlog:gc if it's not mentioned in jvm-server.options file
mkdir -p ${CASSANDRA_LOG_DIR}
JVM_OPTS="$JVM_OPTS -Xlog:gc=info,heap*=trace,age*=debug,safepoint=info,promotion*=trace:file=${CASSANDRA_LOG_DIR}/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760"
fi
else
# Java 8
echo "$JVM_OPTS" | grep -qe "-[X]loggc"
if [ "$?" = "1" ] ; then # [X] to prevent ccm from replacing this line
# only add -Xlog:gc if it's not mentioned in jvm-server.options file
mkdir -p ${CASSANDRA_LOG_DIR}
JVM_OPTS="$JVM_OPTS -Xloggc:${CASSANDRA_LOG_DIR}/gc.log"
fi
fi
# Check what parameters were defined on jvm-server.options file to avoid conflicts
echo $JVM_OPTS | grep -q Xmn
DEFINED_XMN=$?
echo $JVM_OPTS | grep -q Xmx
DEFINED_XMX=$?
echo $JVM_OPTS | grep -q Xms
DEFINED_XMS=$?
echo $JVM_OPTS | grep -q UseConcMarkSweepGC
USING_CMS=$?
echo $JVM_OPTS | grep -q +UseG1GC
USING_G1=$?
# Override these to set the amount of memory to allocate to the JVM at
# start-up. For production use you may wish to adjust this for your
# environment. MAX_HEAP_SIZE is the total amount of memory dedicated
# to the Java heap. HEAP_NEWSIZE refers to the size of the young
# generation. Both MAX_HEAP_SIZE and HEAP_NEWSIZE should be either set
# or not (if you set one, set the other).
#
# The main trade-off for the young generation is that the larger it
# is, the longer GC pause times will be. The shorter it is, the more
# expensive GC will be (usually).
#
# The example HEAP_NEWSIZE assumes a modern 8-core+ machine for decent pause
# times. If in doubt, and if you do not particularly want to tweak, go with
# 100 MB per physical CPU core.
#MAX_HEAP_SIZE="4G"
#HEAP_NEWSIZE="800M"
# Set this to control the amount of arenas per-thread in glibc
#export MALLOC_ARENA_MAX=4
# only calculate the size if it's not set manually
if [ "x$MAX_HEAP_SIZE" = "x" ] && [ "x$HEAP_NEWSIZE" = "x" -o $USING_G1 -eq 0 ]; then
calculate_heap_sizes
elif [ "x$MAX_HEAP_SIZE" = "x" ] || [ "x$HEAP_NEWSIZE" = "x" -a $USING_G1 -ne 0 ]; then
echo "please set or unset MAX_HEAP_SIZE and HEAP_NEWSIZE in pairs when using CMS GC (see cassandra-env.sh)"
exit 1
fi
if [ "x$MALLOC_ARENA_MAX" = "x" ] ; then
export MALLOC_ARENA_MAX=4
fi
# We only set -Xms and -Xmx if they were not defined on jvm-server.options file
# If defined, both Xmx and Xms should be defined together.
if [ $DEFINED_XMX -ne 0 ] && [ $DEFINED_XMS -ne 0 ]; then
JVM_OPTS="$JVM_OPTS -Xms${MAX_HEAP_SIZE}"
JVM_OPTS="$JVM_OPTS -Xmx${MAX_HEAP_SIZE}"
elif [ $DEFINED_XMX -ne 0 ] || [ $DEFINED_XMS -ne 0 ]; then
echo "Please set or unset -Xmx and -Xms flags in pairs on jvm-server.options file."
exit 1
fi
# We only set -Xmn flag if it was not defined in jvm-server.options file
# and if the CMS GC is being used
# If defined, both Xmn and Xmx should be defined together.
if [ $DEFINED_XMN -eq 0 ] && [ $DEFINED_XMX -ne 0 ]; then
echo "Please set or unset -Xmx and -Xmn flags in pairs on jvm-server.options file."
exit 1
elif [ $DEFINED_XMN -ne 0 ] && [ $USING_CMS -eq 0 ]; then
JVM_OPTS="$JVM_OPTS -Xmn${HEAP_NEWSIZE}"
fi
# We fail to start if -Xmn is used with G1 GC is being used
# See comments for -Xmn in jvm-server.options
if [ $DEFINED_XMN -eq 0 ] && [ $USING_G1 -eq 0 ]; then
echo "It is not recommended to set -Xmn with the G1 garbage collector. See comments for -Xmn in jvm-server.options for details."
exit 1
fi
if [ "$JVM_ARCH" = "64-Bit" ] && [ $USING_CMS -eq 0 ]; then
JVM_OPTS="$JVM_OPTS -XX:+UseCondCardMark"
fi
# provides hints to the JIT compiler
JVM_OPTS="$JVM_OPTS -XX:CompileCommandFile=$CASSANDRA_CONF/hotspot_compiler"
# add the jamm javaagent
JVM_OPTS="$JVM_OPTS -javaagent:$CASSANDRA_HOME/lib/jamm-0.3.2.jar"
# set jvm HeapDumpPath with CASSANDRA_HEAPDUMP_DIR
if [ "x$CASSANDRA_HEAPDUMP_DIR" != "x" ]; then
JVM_OPTS="$JVM_OPTS -XX:HeapDumpPath=$CASSANDRA_HEAPDUMP_DIR/cassandra-`date +%s`-pid$$.hprof"
fi
# stop the jvm on OutOfMemoryError as it can result in some data corruption
# uncomment the preferred option
# ExitOnOutOfMemoryError and CrashOnOutOfMemoryError require a JRE greater or equals to 1.7 update 101 or 1.8 update 92
# For OnOutOfMemoryError we cannot use the JVM_OPTS variables because bash commands split words
# on white spaces without taking quotes into account
# JVM_OPTS="$JVM_OPTS -XX:+ExitOnOutOfMemoryError"
# JVM_OPTS="$JVM_OPTS -XX:+CrashOnOutOfMemoryError"
JVM_ON_OUT_OF_MEMORY_ERROR_OPT="-XX:OnOutOfMemoryError=kill -9 %p"
# print an heap histogram on OutOfMemoryError
# JVM_OPTS="$JVM_OPTS -Dcassandra.printHeapHistogramOnOutOfMemoryError=true"
# jmx: metrics and administration interface
#
# add this if you're having trouble connecting:
# JVM_OPTS="$JVM_OPTS -Djava.rmi.server.hostname=<public name>"
#
# see
# https://blogs.oracle.com/jmxetc/entry/troubleshooting_connection_problems_in_jconsole
# for more on configuring JMX through firewalls, etc. (Short version:
# get it working with no firewall first.)
#
# Cassandra ships with JMX accessible *only* from localhost.
# To enable remote JMX connections, uncomment lines below
# with authentication and/or ssl enabled. See https://wiki.apache.org/cassandra/JmxSecurity
#
if [ "x$LOCAL_JMX" = "x" ]; then
LOCAL_JMX=yes
fi
# Specifies the default port over which Cassandra will be available for
# JMX connections.
# For security reasons, you should not expose this port to the internet. Firewall it if needed.
JMX_PORT="7199"
if [ "$LOCAL_JMX" = "yes" ]; then
JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.local.port=$JMX_PORT"
JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=false"
else
JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.port=$JMX_PORT"
# if ssl is enabled the same port cannot be used for both jmx and rmi so either
# pick another value for this property or comment out to use a random port (though see CASSANDRA-7087 for origins)
JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.rmi.port=$JMX_PORT"
# turn on JMX authentication. See below for further options
JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=true"
# jmx ssl options
#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl=true"
#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.need.client.auth=true"
#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.enabled.protocols=<enabled-protocols>"
#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.ssl.enabled.cipher.suites=<enabled-cipher-suites>"
#JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.keyStore=/path/to/keystore"
#JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.keyStorePassword=<keystore-password>"
#JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.trustStore=/path/to/truststore"
#JVM_OPTS="$JVM_OPTS -Djavax.net.ssl.trustStorePassword=<truststore-password>"
fi
# jmx authentication and authorization options. By default, auth is only
# activated for remote connections but they can also be enabled for local only JMX
## Basic file based authn & authz
JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.password.file=/etc/cassandra/jmxremote.password"
#JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.access.file=/etc/cassandra/jmxremote.access"
## Custom auth settings which can be used as alternatives to JMX's out of the box auth utilities.
## JAAS login modules can be used for authentication by uncommenting these two properties.
## Cassandra ships with a LoginModule implementation - org.apache.cassandra.auth.CassandraLoginModule -
## which delegates to the IAuthenticator configured in cassandra.yaml. See the sample JAAS configuration
## file cassandra-jaas.config
#JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.login.config=CassandraLogin"
#JVM_OPTS="$JVM_OPTS -Djava.security.auth.login.config=$CASSANDRA_CONF/cassandra-jaas.config"
## Cassandra also ships with a helper for delegating JMX authz calls to the configured IAuthorizer,
## uncomment this to use it. Requires one of the two authentication options to be enabled
#JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.authorizer=org.apache.cassandra.auth.jmx.AuthorizationProxy"
# To use mx4j, an HTML interface for JMX, add mx4j-tools.jar to the lib/
# directory.
# See http://cassandra.apache.org/doc/latest/operating/metrics.html#jmx
# By default mx4j listens on the broadcast_address, port 8081. Uncomment the following lines
# to control its listen address and port.
#MX4J_ADDRESS="127.0.0.1"
#MX4J_PORT="8081"
# Cassandra uses SIGAR to capture OS metrics CASSANDRA-7838
# for SIGAR we have to set the java.library.path
# to the location of the native libraries.
JVM_OPTS="$JVM_OPTS -Djava.library.path=$CASSANDRA_HOME/lib/sigar-bin"
if [ "x$MX4J_ADDRESS" != "x" ]; then
if [[ "$MX4J_ADDRESS" == \-Dmx4jaddress* ]]; then
# Backward compatible with the older style #13578
JVM_OPTS="$JVM_OPTS $MX4J_ADDRESS"
else
JVM_OPTS="$JVM_OPTS -Dmx4jaddress=$MX4J_ADDRESS"
fi
fi
if [ "x$MX4J_PORT" != "x" ]; then
if [[ "$MX4J_PORT" == \-Dmx4jport* ]]; then
# Backward compatible with the older style #13578
JVM_OPTS="$JVM_OPTS $MX4J_PORT"
else
JVM_OPTS="$JVM_OPTS -Dmx4jport=$MX4J_PORT"
fi
fi
JVM_OPTS="$JVM_OPTS $JVM_EXTRA_OPTS"

View File

@ -0,0 +1,39 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# These properties are used with GossipingPropertyFileSnitch and will
# indicate the rack and dc for this node
dc= Mars
rack= West
# Add a suffix to a datacenter name. Used by the Ec2Snitch and Ec2MultiRegionSnitch
# to append a string to the EC2 region name.
#dc_suffix=
# Uncomment the following line to make this snitch prefer the internal ip when possible, as the Ec2MultiRegionSnitch does.
# prefer_local=true
# Datacenter and rack naming convention used by the Ec2Snitch and Ec2MultiRegionSnitch.
# Options are:
# legacy : datacenter name is the part of the availability zone name preceding the last "-"
# when the zone ends in -1 and includes the number if not -1. Rack is the portion of
# the availability zone name following the last "-".
# Examples: us-west-1a => dc: us-west, rack: 1a; us-west-2b => dc: us-west-2, rack: 2b;
# YOU MUST USE THIS VALUE IF YOU ARE UPGRADING A PRE-4.0 CLUSTER
# standard : Default value. datacenter name is the standard AWS region name, including the number.
# rack name is the region plus the availability zone letter.
# Examples: us-west-1a => dc: us-west-1, rack: us-west-1a; us-west-2b => dc: us-west-2, rack: us-west-2b;
# ec2_naming_scheme=standard

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,48 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# commitlog archiving configuration. Leave blank to disable.
# Command to execute to archive a commitlog segment
# Parameters: %path => Fully qualified path of the segment to archive
# %name => Name of the commit log.
# Example: archive_command=/bin/ln %path /backup/%name
#
# Limitation: *_command= expects one command with arguments. STDOUT
# and STDIN or multiple commands cannot be executed. You might want
# to script multiple commands and add a pointer here.
archive_command=
# Command to execute to make an archived commitlog live again.
# Parameters: %from is the full path to an archived commitlog segment (from restore_directories)
# %to is the live commitlog directory
# Example: restore_command=/bin/cp -f %from %to
restore_command=
# Directory to scan the recovery files in.
restore_directories=
# Restore mutations created up to and including this timestamp in GMT.
# Format: yyyy:MM:dd HH:mm:ss (2012:04:31 20:43:12)
#
# Recovery will continue through the segment when the first client-supplied
# timestamp greater than this time is encountered, but only mutations less than
# or equal to this timestamp will be applied.
restore_point_in_time=
# precision of the timestamp used in the inserts (MILLISECONDS, MICROSECONDS, ...)
precision=MICROSECONDS

View File

@ -0,0 +1,10 @@
###########################################################################
# jvm-clients.options #
# #
# See jvm8-clients.options and jvm11-clients.options for Java version #
# specific options. #
###########################################################################
# intentionally left empty
# The newline in the end of file is intentional

View File

@ -0,0 +1,188 @@
###########################################################################
# jvm-server.options #
# #
# - all flags defined here will be used by cassandra to startup the JVM #
# - one flag should be specified per line #
# - lines that do not start with '-' will be ignored #
# - only static flags are accepted (no variables or parameters) #
# - dynamic flags will be appended to these on cassandra-env #
# #
# See jvm8-server.options and jvm11-server.options for Java version #
# specific options. #
###########################################################################
######################
# STARTUP PARAMETERS #
######################
# Uncomment any of the following properties to enable specific startup parameters
# In a multi-instance deployment, multiple Cassandra instances will independently assume that all
# CPU processors are available to it. This setting allows you to specify a smaller set of processors
# and perhaps have affinity.
#-Dcassandra.available_processors=number_of_processors
# The directory location of the cassandra.yaml file.
#-Dcassandra.config=directory
# Sets the initial partitioner token for a node the first time the node is started.
#-Dcassandra.initial_token=token
# Set to false to start Cassandra on a node but not have the node join the cluster.
#-Dcassandra.join_ring=true|false
# Set to false to clear all gossip state for the node on restart. Use when you have changed node
# information in cassandra.yaml (such as listen_address).
#-Dcassandra.load_ring_state=true|false
# Enable pluggable metrics reporter. See Pluggable metrics reporting in Cassandra 2.0.2.
#-Dcassandra.metricsReporterConfigFile=file
# Set the port on which the CQL native transport listens for clients. (Default: 9042)
#-Dcassandra.native_transport_port=port
# Overrides the partitioner. (Default: org.apache.cassandra.dht.Murmur3Partitioner)
#-Dcassandra.partitioner=partitioner
# To replace a node that has died, restart a new node in its place specifying the address of the
# dead node. The new node must not have any data in its data directory, that is, it must be in the
# same state as before bootstrapping.
#-Dcassandra.replace_address=listen_address or broadcast_address of dead node
# Allow restoring specific tables from an archived commit log.
#-Dcassandra.replayList=table
# Allows overriding of the default RING_DELAY (30000ms), which is the amount of time a node waits
# before joining the ring.
#-Dcassandra.ring_delay_ms=ms
# Set the SSL port for encrypted communication. (Default: 7001)
#-Dcassandra.ssl_storage_port=port
# Set the port for inter-node communication. (Default: 7000)
#-Dcassandra.storage_port=port
# Set the default location for the trigger JARs. (Default: conf/triggers)
#-Dcassandra.triggers_dir=directory
# For testing new compaction and compression strategies. It allows you to experiment with different
# strategies and benchmark write performance differences without affecting the production workload.
#-Dcassandra.write_survey=true
# To disable configuration via JMX of auth caches (such as those for credentials, permissions and
# roles). This will mean those config options can only be set (persistently) in cassandra.yaml
# and will require a restart for new values to take effect.
#-Dcassandra.disable_auth_caches_remote_configuration=true
# To disable dynamic calculation of the page size used when indexing an entire partition (during
# initial index build/rebuild). If set to true, the page size will be fixed to the default of
# 10000 rows per page.
#-Dcassandra.force_default_indexing_page_size=true
# Imposes an upper bound on hint lifetime below the normal min gc_grace_seconds
#-Dcassandra.maxHintTTL=max_hint_ttl_in_seconds
########################
# GENERAL JVM SETTINGS #
########################
# enable assertions. highly suggested for correct application functionality.
-ea
# disable assertions for net.openhft.** because it runs out of memory by design
# if enabled and run for more than just brief testing
-da:net.openhft...
# enable thread priorities, primarily so we can give periodic tasks
# a lower priority to avoid interfering with client workload
-XX:+UseThreadPriorities
# Enable heap-dump if there's an OOM
-XX:+HeapDumpOnOutOfMemoryError
# Per-thread stack size.
-Xss256k
# Make sure all memory is faulted and zeroed on startup.
# This helps prevent soft faults in containers and makes
# transparent hugepage allocation more effective.
-XX:+AlwaysPreTouch
# Disable biased locking as it does not benefit Cassandra.
-XX:-UseBiasedLocking
# Enable thread-local allocation blocks and allow the JVM to automatically
# resize them at runtime.
-XX:+UseTLAB
-XX:+ResizeTLAB
-XX:+UseNUMA
# http://www.evanjones.ca/jvm-mmap-pause.html
-XX:+PerfDisableSharedMem
# Prefer binding to IPv4 network intefaces (when net.ipv6.bindv6only=1). See
# http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6342561 (short version:
# comment out this entry to enable IPv6 support).
-Djava.net.preferIPv4Stack=true
### Debug options
# uncomment to enable flight recorder
#-XX:+UnlockCommercialFeatures
#-XX:+FlightRecorder
# uncomment to have Cassandra JVM listen for remote debuggers/profilers on port 1414
#-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=1414
# uncomment to have Cassandra JVM log internal method compilation (developers only)
#-XX:+UnlockDiagnosticVMOptions
#-XX:+LogCompilation
#################
# HEAP SETTINGS #
#################
# Heap size is automatically calculated by cassandra-env based on this
# formula: max(min(1/2 ram, 1024MB), min(1/4 ram, 8GB))
# That is:
# - calculate 1/2 ram and cap to 1024MB
# - calculate 1/4 ram and cap to 8192MB
# - pick the max
#
# For production use you may wish to adjust this for your environment.
# If that's the case, uncomment the -Xmx and Xms options below to override the
# automatic calculation of JVM heap memory.
#
# It is recommended to set min (-Xms) and max (-Xmx) heap sizes to
# the same value to avoid stop-the-world GC pauses during resize, and
# so that we can lock the heap in memory on startup to prevent any
# of it from being swapped out.
#-Xms4G
#-Xmx4G
# Young generation size is automatically calculated by cassandra-env
# based on this formula: min(100 * num_cores, 1/4 * heap size)
#
# The main trade-off for the young generation is that the larger it
# is, the longer GC pause times will be. The shorter it is, the more
# expensive GC will be (usually).
#
# It is not recommended to set the young generation size if using the
# G1 GC, since that will override the target pause-time goal.
# More info: http://www.oracle.com/technetwork/articles/java/g1gc-1984535.html
#
# The example below assumes a modern 8-core+ machine for decent
# times. If in doubt, and if you do not particularly want to tweak, go
# 100 MB per physical CPU core.
#-Xmn800M
###################################
# EXPIRATION DATE OVERFLOW POLICY #
###################################
# Defines how to handle INSERT requests with TTL exceeding the maximum supported expiration date:
# * REJECT: this is the default policy and will reject any requests with expiration date timestamp after 2038-01-19T03:14:06+00:00.
# * CAP: any insert with TTL expiring after 2038-01-19T03:14:06+00:00 will expire on 2038-01-19T03:14:06+00:00 and the client will receive a warning.
# * CAP_NOWARN: same as previous, except that the client warning will not be emitted.
#
#-Dcassandra.expiration_date_overflow_policy=REJECT

View File

@ -0,0 +1,29 @@
###########################################################################
# jvm11-clients.options #
# #
# See jvm-clients.options. This file is specific for Java 11 and newer. #
###########################################################################
###################
# JPMS SETTINGS #
###################
-Djdk.attach.allowAttachSelf=true
--add-exports java.base/jdk.internal.misc=ALL-UNNAMED
--add-exports java.base/jdk.internal.ref=ALL-UNNAMED
--add-exports java.base/sun.nio.ch=ALL-UNNAMED
--add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED
--add-exports java.sql/java.sql=ALL-UNNAMED
--add-opens java.base/java.lang.module=ALL-UNNAMED
--add-opens java.base/jdk.internal.loader=ALL-UNNAMED
--add-opens java.base/jdk.internal.ref=ALL-UNNAMED
--add-opens java.base/jdk.internal.reflect=ALL-UNNAMED
--add-opens java.base/jdk.internal.math=ALL-UNNAMED
--add-opens java.base/jdk.internal.module=ALL-UNNAMED
--add-opens java.base/jdk.internal.util.jar=ALL-UNNAMED
--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED
# The newline in the end of file is intentional

View File

@ -0,0 +1,103 @@
###########################################################################
# jvm11-server.options #
# #
# See jvm-server.options. This file is specific for Java 11 and newer. #
###########################################################################
#################
# GC SETTINGS #
#################
### CMS Settings
-XX:+UseConcMarkSweepGC
-XX:+CMSParallelRemarkEnabled
-XX:SurvivorRatio=8
-XX:MaxTenuringThreshold=1
-XX:CMSInitiatingOccupancyFraction=75
-XX:+UseCMSInitiatingOccupancyOnly
-XX:CMSWaitDuration=10000
-XX:+CMSParallelInitialMarkEnabled
-XX:+CMSEdenChunksRecordAlways
## some JVMs will fill up their heap when accessed via JMX, see CASSANDRA-6541
-XX:+CMSClassUnloadingEnabled
### G1 Settings
## Use the Hotspot garbage-first collector.
#-XX:+UseG1GC
#-XX:+ParallelRefProcEnabled
#
## Have the JVM do less remembered set work during STW, instead
## preferring concurrent GC. Reduces p99.9 latency.
#-XX:G1RSetUpdatingPauseTimePercent=5
#
## Main G1GC tunable: lowering the pause target will lower throughput and vise versa.
## 200ms is the JVM default and lowest viable setting
## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml.
#-XX:MaxGCPauseMillis=500
## Optional G1 Settings
# Save CPU time on large (>= 16GB) heaps by delaying region scanning
# until the heap is 70% full. The default in Hotspot 8u40 is 40%.
#-XX:InitiatingHeapOccupancyPercent=70
# For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores.
# Otherwise equal to the number of cores when 8 or less.
# Machines with > 10 cores should try setting these to <= full cores.
#-XX:ParallelGCThreads=16
# By default, ConcGCThreads is 1/4 of ParallelGCThreads.
# Setting both to the same value can reduce STW durations.
#-XX:ConcGCThreads=16
### JPMS
-Djdk.attach.allowAttachSelf=true
--add-exports java.base/jdk.internal.misc=ALL-UNNAMED
--add-exports java.base/jdk.internal.ref=ALL-UNNAMED
--add-exports java.base/sun.nio.ch=ALL-UNNAMED
--add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED
--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED
--add-exports java.sql/java.sql=ALL-UNNAMED
--add-opens java.base/java.lang.module=ALL-UNNAMED
--add-opens java.base/jdk.internal.loader=ALL-UNNAMED
--add-opens java.base/jdk.internal.ref=ALL-UNNAMED
--add-opens java.base/jdk.internal.reflect=ALL-UNNAMED
--add-opens java.base/jdk.internal.math=ALL-UNNAMED
--add-opens java.base/jdk.internal.module=ALL-UNNAMED
--add-opens java.base/jdk.internal.util.jar=ALL-UNNAMED
--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED
### GC logging options -- uncomment to enable
# Java 11 (and newer) GC logging options:
# See description of https://bugs.openjdk.java.net/browse/JDK-8046148 for details about the syntax
# The following is the equivalent to -XX:+PrintGCDetails -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M
#-Xlog:gc=info,heap*=trace,age*=debug,safepoint=info,promotion*=trace:file=/var/log/cassandra/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760
# Notes for Java 8 migration:
#
# -XX:+PrintGCDetails maps to -Xlog:gc*:... - i.e. add a '*' after "gc"
# -XX:+PrintGCDateStamps maps to decorator 'time'
#
# -XX:+PrintHeapAtGC maps to 'heap' with level 'trace'
# -XX:+PrintTenuringDistribution maps to 'age' with level 'debug'
# -XX:+PrintGCApplicationStoppedTime maps to 'safepoint' with level 'info'
# -XX:+PrintPromotionFailure maps to 'promotion' with level 'trace'
# -XX:PrintFLSStatistics=1 maps to 'freelist' with level 'trace'
### Netty Options
# On Java >= 9 Netty requires the io.netty.tryReflectionSetAccessible system property to be set to true to enable
# creation of direct buffers using Unsafe. Without it, this falls back to ByteBuffer.allocateDirect which has
# inferior performance and risks exceeding MaxDirectMemory
-Dio.netty.tryReflectionSetAccessible=true
# The newline in the end of file is intentional

View File

@ -0,0 +1,9 @@
###########################################################################
# jvm8-clients.options #
# #
# See jvm-clients.options. This file is specific for Java 8 and newer. #
###########################################################################
# intentionally left empty
# The newline in the end of file is intentional

View File

@ -0,0 +1,76 @@
###########################################################################
# jvm8-server.options #
# #
# See jvm-server.options. This file is specific for Java 8 and newer. #
###########################################################################
########################
# GENERAL JVM SETTINGS #
########################
# allows lowering thread priority without being root on linux - probably
# not necessary on Windows but doesn't harm anything.
# see http://tech.stolsvik.com/2010/01/linux-java-thread-priorities-workaround.html
-XX:ThreadPriorityPolicy=42
#################
# GC SETTINGS #
#################
### CMS Settings
-XX:+UseParNewGC
-XX:+UseConcMarkSweepGC
-XX:+CMSParallelRemarkEnabled
-XX:SurvivorRatio=8
-XX:MaxTenuringThreshold=1
-XX:CMSInitiatingOccupancyFraction=75
-XX:+UseCMSInitiatingOccupancyOnly
-XX:CMSWaitDuration=10000
-XX:+CMSParallelInitialMarkEnabled
-XX:+CMSEdenChunksRecordAlways
## some JVMs will fill up their heap when accessed via JMX, see CASSANDRA-6541
-XX:+CMSClassUnloadingEnabled
### G1 Settings
## Use the Hotspot garbage-first collector.
#-XX:+UseG1GC
#-XX:+ParallelRefProcEnabled
#
## Have the JVM do less remembered set work during STW, instead
## preferring concurrent GC. Reduces p99.9 latency.
#-XX:G1RSetUpdatingPauseTimePercent=5
#
## Main G1GC tunable: lowering the pause target will lower throughput and vise versa.
## 200ms is the JVM default and lowest viable setting
## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml.
#-XX:MaxGCPauseMillis=500
## Optional G1 Settings
# Save CPU time on large (>= 16GB) heaps by delaying region scanning
# until the heap is 70% full. The default in Hotspot 8u40 is 40%.
#-XX:InitiatingHeapOccupancyPercent=70
# For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores.
# Otherwise equal to the number of cores when 8 or less.
# Machines with > 10 cores should try setting these to <= full cores.
#-XX:ParallelGCThreads=16
# By default, ConcGCThreads is 1/4 of ParallelGCThreads.
# Setting both to the same value can reduce STW durations.
#-XX:ConcGCThreads=16
### GC logging options -- uncomment to enable
-XX:+PrintGCDetails
-XX:+PrintGCDateStamps
-XX:+PrintHeapAtGC
-XX:+PrintTenuringDistribution
-XX:+PrintGCApplicationStoppedTime
-XX:+PrintPromotionFailure
#-XX:PrintFLSStatistics=1
#-Xloggc:/var/log/cassandra/gc.log
-XX:+UseGCLogFileRotation
-XX:NumberOfGCLogFiles=10
-XX:GCLogFileSize=10M
# The newline in the end of file is intentional

View File

@ -0,0 +1,124 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<!--
In order to disable debug.log, comment-out the ASYNCDEBUGLOG
appender reference in the root level section below.
-->
<configuration scan="true" scanPeriod="60 seconds">
<jmxConfigurator />
<!-- No shutdown hook; we run it ourselves in StorageService after shutdown -->
<!-- SYSTEMLOG rolling file appender to system.log (INFO level) -->
<appender name="SYSTEMLOG" class="ch.qos.logback.core.rolling.RollingFileAppender">
<filter class="ch.qos.logback.classic.filter.ThresholdFilter">
<level>INFO</level>
</filter>
<file>${cassandra.logdir}/system.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<!-- rollover daily -->
<fileNamePattern>${cassandra.logdir}/system.log.%d{yyyy-MM-dd}.%i.zip</fileNamePattern>
<!-- each file should be at most 50MB, keep 7 days worth of history, but at most 5GB -->
<maxFileSize>50MB</maxFileSize>
<maxHistory>7</maxHistory>
<totalSizeCap>5GB</totalSizeCap>
</rollingPolicy>
<encoder>
<pattern>%-5level [%thread] %date{ISO8601} %F:%L - %msg%n</pattern>
</encoder>
</appender>
<!-- DEBUGLOG rolling file appender to debug.log (all levels) -->
<appender name="DEBUGLOG" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${cassandra.logdir}/debug.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<!-- rollover daily -->
<fileNamePattern>${cassandra.logdir}/debug.log.%d{yyyy-MM-dd}.%i.zip</fileNamePattern>
<!-- each file should be at most 50MB, keep 7 days worth of history, but at most 5GB -->
<maxFileSize>50MB</maxFileSize>
<maxHistory>7</maxHistory>
<totalSizeCap>5GB</totalSizeCap>
</rollingPolicy>
<encoder>
<pattern>%-5level [%thread] %date{ISO8601} %F:%L - %msg%n</pattern>
</encoder>
</appender>
<!-- ASYNCLOG assynchronous appender to debug.log (all levels) -->
<appender name="ASYNCDEBUGLOG" class="ch.qos.logback.classic.AsyncAppender">
<queueSize>1024</queueSize>
<discardingThreshold>0</discardingThreshold>
<includeCallerData>true</includeCallerData>
<appender-ref ref="DEBUGLOG" />
</appender>
<!-- STDOUT console appender to stdout (INFO level) -->
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<filter class="ch.qos.logback.classic.filter.ThresholdFilter">
<level>INFO</level>
</filter>
<encoder>
<pattern>%-5level [%thread] %date{ISO8601} %F:%L - %msg%n</pattern>
</encoder>
</appender>
<!-- Uncomment below configuration (Audit Logging (FileAuditLogger) rolling file appender and Audit Logging
additivity) in order to have the log events flow through separate log file instead of system.log.
Audit Logging (FileAuditLogger) rolling file appender to audit.log -->
<!-- <appender name="AUDIT" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${cassandra.logdir}/audit/audit.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy"> -->
<!-- rollover daily -->
<!-- <fileNamePattern>${cassandra.logdir}/audit/audit.log.%d{yyyy-MM-dd}.%i.zip</fileNamePattern> -->
<!-- each file should be at most 50MB, keep 30 days worth of history, but at most 5GB -->
<!-- <maxFileSize>50MB</maxFileSize>
<maxHistory>30</maxHistory>
<totalSizeCap>5GB</totalSizeCap>
</rollingPolicy>
<encoder>
<pattern>%-5level [%thread] %date{ISO8601} %F:%L - %msg%n</pattern>
</encoder>
</appender> -->
<!-- Audit Logging additivity to redirect audit logging events to audit/audit.log -->
<!-- <logger name="org.apache.cassandra.audit" additivity="false" level="INFO">
<appender-ref ref="AUDIT"/>
</logger> -->
<!-- Uncomment bellow and corresponding appender-ref to activate logback metrics
<appender name="LogbackMetrics" class="com.codahale.metrics.logback.InstrumentedAppender" />
-->
<root level="INFO">
<appender-ref ref="SYSTEMLOG" />
<appender-ref ref="STDOUT" />
<appender-ref ref="ASYNCDEBUGLOG" /> <!-- Comment this line to disable debug.log -->
<!--
<appender-ref ref="LogbackMetrics" />
-->
</root>
<logger name="org.apache.cassandra" level="DEBUG"/>
</configuration>

View File

@ -0,0 +1,4 @@
FROM python:3
ADD exercise2_3.py /
RUN pip install cassandra-driver
CMD [ "python3", "./exercise2_3.py" ]

View File

@ -0,0 +1,24 @@
from collections import OrderedDict
from collections import Counter
from cassandra.cluster import Cluster
cluster = Cluster(['172.20.0.6', '172.20.0.7', '172.20.0.8'])
session = cluster.connect('twitter')
# 2. Find the 100 accounts with the most followers
rows = session.execute('SELECT user_id,follower_len FROM twitter.most_follows;')
sorted_rows = dict(sorted(dict(rows).items(), key=lambda item: item[1]))
most_follows = list(sorted_rows.keys())[-100:]
print("Top 100 most followed accounts:",most_follows)
print("________________________________________________________________")
# Finding the 100 accounts that follow the most of the accounts found in 2).
followed_accs = session.execute(f'SELECT follower_id FROM twitter.follower_relation WHERE user_id IN {tuple(most_follows)};')
# followed_accs = list(followed_accs.keys())
_list = list()
for row in followed_accs:
_list.append(row[0])
followed_top_100 = Counter(_list).most_common(100)
print("100 accounts that follow the most of the accounts found in 2)",followed_top_100)

View File

@ -0,0 +1,44 @@
// 1. Auflisten der Posts, die von einem Account gemacht wurden, bzw. ihm zugeordnet wurden
SELECT content FROM twitter.tweets where author_id = X;
// or
SELECT * FROM twitter.tweets WHERE author='katyperry' ALLOW FILTERING;
// 2 .Finden der 100 Accounts mit den meisten Followern
// VIEW not possible -> no group by allowed AND YOU CANT SORT DATA https://stackoverflow.com/a/14463098
// CREATE MATERIALIZED VIEW twitter.most_follows AS SELECT user_id, count(user_id) as len from twitter.user group by user_id PRIMARY KEY (user_id,len);
// # Use of spark cluster or trigger to update tables to add the lenght of follower ids. -> New Data Schema
// # not supported on counter table -> CREATE MATERIALIZED VIEW twitter.most_follows AS SELECT user_id, follower_len from twitter.user_stats PRIMARY KEY ((follower_len,user_id));
// change to int
CREATE MATERIALIZED VIEW twitter.most_follows AS SELECT user_id, follower_len from twitter.user_stats WHERE user_id is not null and follower_len is not null PRIMARY KEY (follower_len,user_id);
// 3. Finden der 100 Accounts, die den meisten der Accounts folgen, die in 2) gefunden wurden
// 4.
// die Anzahl der Follower & die Anzahl der verfolgten Accounts
SELECT * FROM twitter.user_stats WHERE user_id = 14378300;
// 25 neusten
CREATE MATERIALIZED VIEW twitter.start_view_new AS SELECT user_id_x,follower_id,date_time,name,author,content,id FROM twitter.user WHERE user_id_x IS NOT NULL AND follower_id IS NOT NULL AND date_time IS NOT NULL AND id IS NOT NULL PRIMARY KEY ((user_id_x),date_time,follower_id,id);
SELECT * FROM twitter.start_view WHERE user_id_x = 14378300 ORDER BY date_time DESC LIMIT 25;
// 25 beliebtesten
CREATE MATERIALIZED VIEW twitter.start_view_like AS SELECT user_id_x,follower_id,number_of_likes,date_time,author,name,content,id FROM twitter.user WHERE user_id_x IS NOT NULL AND follower_id IS NOT NULL AND number_of_likes IS NOT NULL AND id IS NOT NULL PRIMARY KEY ((user_id_x),number_of_likes,follower_id,id);
SELECT * FROM twitter.start_view_like WHERE user_id_x = 14378300 ORDER BY number_of_likes DESC LIMIT 25;
// 5. Fan-Out as VIEW ?
// # InvalidRequest: Error from server: code=2200 [Invalid query] message="Unknown column 'user_id_x' referenced in PRIMARY KEY for materialized view 'start_view_biber'"
CREATE MATERIALIZED VIEW twitter.start_view_biber AS SELECT follower_id,number_of_likes,date_time,author,name,content,id FROM twitter.user WHERE user_id_x IS NOT NULL AND user_id_x=172883064 AND follower_id IS NOT NULL AND number_of_likes IS NOT NULL AND id IS NOT NULL PRIMARY KEY ((user_id_x),number_of_likes,follower_id,id);
// # order by need partion key in WHERE
SELECT * from twitter.start_view_biber WHERE user_id_x=14378300 ORDER BY number_of_likes DESC LIMIT 25;
// INSERT new tweet
INSERT INTO twitter.user(user_id_x,follower_id,name,author ,content ,country ,date_time ,id ,language ,latitude ,longitude ,number_of_likes ,number_of_shares ,user_id_y )
VALUES(14378300, 261047860, 'Justin','NoName', 'Hallo there BDEA','DE', dateof(now()), 'NoID', 'text', 100, 100, 10000000, 0, 0);
// 6. Auflisten Post die ein geg. Wort enthalten (falls möglich auch mit UND-Verknüpfung mehrerer Worte)
// # Enables SASI index creation on this node && SASI indexes are considered experimental and are not recommended for production use.
// # InvalidRequest: Error from server: code=2200 [Invalid query] message="Secondary indexes on materialized views aren't supported"
// # SyntaxException: line 1:7 no viable alternative at input 'SEARCH' ([CREATE] SEARCH...)
CREATE CUSTOM INDEX search_in ON twitter.tweets (content) USING 'org.apache.cassandra.index.sasi.SASIIndex' WITH OPTIONS = { 'mode': 'CONTAINS', 'analyzer_class': 'org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer', 'case_sensitive': 'false' };
SELECT * from twitter.tweets WHERE content LIKE '%This%' limit 25;

View File

@ -0,0 +1,83 @@
from ctypes import sizeof
from datetime import date
import pandas as pd
import numpy as np
# read follwer
df_follower = pd.read_csv("cassandra/startup/data/twitter_combined_orginal.txt", sep=' ', names=["user_id","follower_id"])
df_follower = df_follower.drop_duplicates() # drop duplicate follows in data
df_user_stats = df_follower.groupby('user_id')['follower_id'].size().reset_index(name='follower_len') # group user_id to follows -> user is following
df_user_stats['follows_len'] = df_follower.groupby('follower_id')['user_id'].size().reset_index(name='follows_len')['follows_len'] # group follows to user_id -> user are followed by
df_user_follow_list = df_follower.groupby('user_id')['follower_id'].apply(list).reset_index(name='follows') # group follows to user_id -> user are followed by
df_user_follow_list['follower'] = df_follower.groupby('follower_id')['user_id'].apply(list).reset_index(name='follower')['follower'] # group follows to user_id -> user are followed by
df_user_stats['follows_len'] = df_follower.groupby('follower_id')['user_id'].size().reset_index(name='follows_len')['follows_len'] # group follows to user_id -> user are followed by
df_follower.to_csv("cassandra/startup/data/user_follower_relation.csv",index=False)
# change comment section in twitter.csv because \n and \r are not functionally quoteted
df_tweet = pd.read_csv("cassandra/startup/data/tweets_orginal.csv")
df_tweet['content'] = df_tweet["content"].str.replace("\n","\\n")
df_tweet['content'] = df_tweet["content"].str.replace("\r","\\r")
df_tweet['content'] = df_tweet["content"].str.replace(',',"\,")
### map user_ids to the tweets
# list of distinct authors
authors = df_tweet['author'].drop_duplicates().reset_index(drop=True)
# sort user after the most followers and get random authors ids
df_follower['len'] = df_follower.groupby('user_id')['follower_id'].apply(list).str.len()
df_user_id = df_follower.sort_values(by='len', ascending=False)['user_id'][:100].sample(n=len(authors)).reset_index(drop=True)
df_follower = df_follower.drop(columns='len')
# merge the author_names and id into the user table
authors = pd.DataFrame(authors)
authors['user_id'] = df_user_id
df_tweet['user_id'] = df_tweet.merge(authors, on='author').user_id
df_follower['name'] = df_follower.merge(authors,how='left', on='user_id')['author'].values
tweets_len = df_tweet.groupby('user_id')['id'].size().reset_index(name='tweets_len').astype(int)
df_user_stats['tweets_len'] = df_user_stats.merge(tweets_len,how='left', on='user_id')['tweets_len'].values
# add liked from user to each tweet
user_ids = pd.DataFrame(df_follower['user_id'].drop_duplicates())
num_of_ids_missing = int(df_tweet['number_of_likes'].max()/10) - user_ids.values.size
# generate new user if most liked tweet are more than the user number
if num_of_ids_missing > 0:
a = np.empty((num_of_ids_missing,2,))
a[:] = np.nan
new_user_ids = np.arange(user_ids.max()+1,user_ids.max()+1+num_of_ids_missing)
a[:,0] = new_user_ids
user_ids = pd.concat([pd.DataFrame(a, columns=user_ids.columns), user_ids], ignore_index=True)
user_ids['user_id'] = user_ids['user_id'].astype(np.uint32)
# sort random user_id to all tweets
tweet_ids = pd.DataFrame(df_tweet['id'].drop_duplicates())
tweet_ids['liked_from'] = df_tweet.apply(lambda row: list(np.random.choice(user_ids['user_id'],size = int(row['number_of_likes']/10))),axis=1)
tweet_ids['date_time'] = df_tweet['date_time']
# split into separte files
partitions = 7
dfs = np.array_split(tweet_ids, partitions)
for i,df in enumerate(dfs):
df.to_csv("cassandra/startup/data/tweet_liked/tweet"+str(i)+".txt",index=False)
# save the updated data
df_tweet.to_csv("cassandra/startup/data/tweets.csv",index=False)
df_user_stats.to_csv("cassandra/startup/data/user_stats.txt",index=False)
df_user_follow_list.to_csv("cassandra/startup/data/user_follows.txt",index=False)
# save realtionship betweet user_ID, follower_ID and tweet_ID
relation_list = list()
for i,row in df_tweet.iterrows():
df = df_tweet.iloc[[i]]
relation_list.append(df_follower.merge(df,left_on='follower_id',right_on='user_id'))
if i % 1000 == 0:
df_follower_new = pd.concat(relation_list)
df_follower_new.to_csv("cassandra/startup/data/relations/relation"+str(i)+".txt",index=False)
relation_list = list()

View File

@ -0,0 +1,11 @@
#!/bin/bash
cqlsh cass1 -f /tmp/startup/setup/setup_db_1.cql
cqlsh cass1 -f /tmp/startup/setup/setup_db_2.cql
# NOTE : Quote it else use array to avoid problems #
# <stdin>:1:Failed to import 20 rows: WriteTimeout - Error from server: code=1100 [Coordinator node timed out waiting for replica nodes' responses] message="Operation timed out - received only 0 responses." info={'consistency': 'ONE', 'required_responses': 1, 'received_responses': 0, 'write_type': 'UNLOGGED_BATCH'}, will retry later, attempt 1 of 5
TWEET_FILES="/tmp/startup/data/relations/*"
for f in $TWEET_FILES; do
cqlsh cass1 -e "COPY twitter.user (user_id_x,follower_id,name,author,content,country,date_time,id,language,latitude,longitude,number_of_likes,number_of_shares,user_id_y) FROM '$f' WITH DELIMITER=',' AND HEADER=TRUE;"
done
cqlsh cass1 -f /tmp/startup/setup/setup_views.cql

View File

@ -0,0 +1,25 @@
CREATE KEYSPACE twitter WITH replication = {'class':'NetworkTopologyStrategy', 'replication_factor' : 3};
CREATE TABLE twitter.tweets(
author text,
content text,
country text,
date_time timestamp,
id text,
language text,
latitude double,
longitude double,
number_of_likes int,
number_of_shares int,
author_id bigint,
PRIMARY KEY ((author_id), date_time, id)
) WITH compaction = {'class' : 'LeveledCompactionStrategy'};
CREATE TABLE twitter.follower_relation(
user_id int,
follower_id int,
PRIMARY KEY (user_id,follower_id)
) WITH compaction = {'class' : 'LeveledCompactionStrategy'};
COPY twitter.tweets (author,content,country,date_time,id,language,latitude,longitude,number_of_likes,number_of_shares, author_id) FROM '/tmp/startup/data/tweets.csv' WITH DELIMITER=',' AND HEADER=TRUE;
COPY twitter.follower_relation (user_id,follower_id) FROM '/tmp/startup/data/user_follower_relation.csv' WITH DELIMITER=',' AND HEADER=TRUE;

View File

@ -0,0 +1,37 @@
CREATE TABLE twitter.user(
user_id_x int,
follower_id int,
name text,
author text,
content text,
country text,
date_time timestamp,
id text,
language text,
latitude double,
longitude double,
number_of_likes int,
number_of_shares int,
user_id_y int,
PRIMARY KEY((user_id_x,follower_id,id))
) WITH compaction = {'class' : 'LeveledCompactionStrategy'};
CREATE TABLE twitter.tweet_liked_from(
tweet_id text,
date_time timestamp,
liked_from set<int>,
PRIMARY KEY(tweet_id,date_time)
) WITH compaction = {'class' : 'LeveledCompactionStrategy'};
CREATE TABLE twitter.user_stats(
user_id bigint,
follower_len int,
follows_len int,
tweet_len int,
PRIMARY KEY(user_id)
) WITH compaction = {'class' : 'LeveledCompactionStrategy'};
COPY twitter.user_stats (user_id,follower_len,follows_len,tweet_len) FROM '/tmp/startup/data/user_stats.txt' WITH DELIMITER=',' AND HEADER=TRUE;

View File

@ -0,0 +1,5 @@
#!/bin/bash
LIKED_FROM_FILES="/tmp/startup/data/tweet_liked/*"
for f in $LIKED_FROM_FILES; do
cqlsh cass1 -e "COPY twitter.tweet_liked_from (tweet_id,liked_from,date_time) FROM '$f' WITH DELIMITER=',' AND HEADER=TRUE;"
done

View File

@ -0,0 +1,28 @@
CREATE MATERIALIZED VIEW twitter.start_view_like AS
SELECT user_id_x,follower_id,number_of_likes,number_of_shares,date_time,author,name,content,id
FROM twitter.user
WHERE user_id_x IS NOT NULL AND follower_id IS NOT NULL AND number_of_likes IS NOT NULL AND id IS NOT NULL
PRIMARY KEY ((user_id_x),number_of_likes,follower_id,id);
CREATE MATERIALIZED VIEW twitter.start_view_new AS
SELECT user_id_x,follower_id,date_time,number_of_likes,number_of_shares,name,author,content,id
FROM twitter.user
WHERE user_id_x IS NOT NULL AND follower_id IS NOT NULL AND date_time IS NOT NULL AND id IS NOT NULL
PRIMARY KEY ((user_id_x),date_time,follower_id,id);
CREATE MATERIALIZED VIEW twitter.start_view_taylor AS
SELECT follower_id,number_of_likes,number_of_shares,date_time,author,name,content,id,user_id_x
FROM twitter.user
WHERE user_id_x IS NOT NULL AND user_id_x=233248636 AND follower_id IS NOT NULL AND number_of_likes IS NOT NULL AND id IS NOT NULL
PRIMARY KEY ((user_id_x),number_of_likes,follower_id,id);
CREATE MATERIALIZED VIEW twitter.start_view_user1 AS
SELECT follower_id,number_of_likes,number_of_shares,date_time,author,name,content,id,user_id_x
FROM twitter.user
WHERE user_id_x IS NOT NULL AND user_id_x=172883064 AND follower_id IS NOT NULL AND number_of_likes IS NOT NULL AND id IS NOT NULL
PRIMARY KEY ((user_id_x),number_of_likes,follower_id,id);
CREATE CUSTOM INDEX search_in
ON twitter.tweets (content)
USING 'org.apache.cassandra.index.sasi.SASIIndex'
WITH OPTIONS = { 'mode': 'CONTAINS', 'analyzer_class': 'org.apache.cassandra.index.sasi.analyzer.NonTokenizingAnalyzer', 'case_sensitive': 'false' };